diff --git a/LICENSE.MD2P4 b/LICENSE.MD2P4 new file mode 100644 index 00000000..6df0b8a9 --- /dev/null +++ b/LICENSE.MD2P4 @@ -0,0 +1,33 @@ + MD2P4 + Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS + for + Parallel Sparse BLAS v2.0 + (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata + Alfredo Buttari University of Rome Tor Vergata + Daniela di Serafino Second University of Naples + Pasqua D'Ambra ICAR-CNR + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the MD2P4 group or the names of its contributors may + not be used to endorse or promote products derived from this + software without specific written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + diff --git a/Make.inc b/Make.inc new file mode 100644 index 00000000..87269cd9 --- /dev/null +++ b/Make.inc @@ -0,0 +1,86 @@ +.mod=.mod +.fh=.fh +.SUFFIXES: .f90 $(.mod) .F90 + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +F90=/usr/local/gcc42/bin/gfortran +FC=/usr/local/gcc42/bin/gfortran +F77=$(FC) +CC=/usr/local/gcc42/bin/gcc +F90COPT=-O3 -ggdb +FCOPT=-O3 -ggdb +CCOPT=-O3 -ggdb + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +F90LINK=/usr/local/mpich-gcc42/bin/mpif90 +FLINK=/usr/local/mpich-gcc42/bin/mpif77 +MPF90=/usr/local/mpich-gcc42/bin/mpif90 +MPF77=/usr/local/mpich-gcc42/bin/mpif77 +MPCC=/usr/local/mpich-gcc42/bin/mpicc + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lblas-gcc42 -L$(HOME)/LIB +BLACS=-lmpiblacs-gcc42 -L$(HOME)/LIB + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +SLUDIR=/usr/local/SuperLU_3.0 +SLU=-lslu_lx_gcc42 -L$(SLUDIR) +SLUDEF=-DHave_SLU_ -I$(SLUDIR) + +UMFDIR=$(HOME)/LIB/Umfpack_gcc41 +UMF=-lumfpack -lamd -L$(UMFDIR) +UMFDEF=-DHave_UMF_ -I$(UMFDIR) +# +# We are using the public domain tool METIS from U. Minnesota. To get it +# check URL http://www.cs.umn.edu:~karypis +# +METIS_LIB = -L$(HOME)/NUMERICAL/metis-4.0 -lmetis +LDLIBS=$(BLACS) $(SLU) $(UMF) $(BLAS) $(METIS_LIB) + +# Add -DLargeFptr for 64-bit addresses +CDEFINES=-DAdd_ $(SLUDEF) $(UMFDEF) +FDEFINES=-DNETLIB_BLACS -DHAVE_MOVE_ALLOC + +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR=lib +BASELIBNAME=libpsb_base.a +PRECLIBNAME=libpsb_prec.a +METHDLIBNAME=libpsb_krylov.a +UTILLIBNAME=libpsb_util.a + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< +.F90.o: + $(F90) $(F90COPT) $(INCDIRS) $(FDEFINES) -c $< + + + + diff --git a/Make.inc.g95 b/Make.inc.g95 new file mode 100644 index 00000000..7a448b3b --- /dev/null +++ b/Make.inc.g95 @@ -0,0 +1,85 @@ +.mod=.mod +.fh=.fh +.SUFFIXES: .f90 $(.mod) .F90 + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +F90=/usr/local/bin/g95 +FC=/usr/local/bin/g95 +F77=$(FC) +F90COPT= -O3 +FCOPT=-O3 +CCOPT=-O3 + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +F90LINK=/usr/local/mpich-g95/bin/mpif90 +FLINK=/usr/local/mpich-g95/bin/mpif77 +MPF90=/usr/local/mpich-g95/bin/mpif90 +MPF77=/usr/local/mpich-g95/bin/mpif77 +MPCC=/usr/local/mpich-g95/bin/mpicc + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lblasg95 -L$(HOME)/LIB +BLACS=-lmpiblacsg95 -L$(HOME)/LIB + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +#SLUDIR=/usr/local/SuperLU_3.0 +#SLU=-lslu_lx_gfort -L$(SLUDIR) +#SLUDEF=-DHave_SLU_ -I$(SLUDIR) + +#UMFDIR=$(HOME)/LIB/Umfpack_gcc41 +#UMF=-lumfpack -lamd -L$(UMFDIR) +#UMFDEF=-DHave_UMF_ -I$(UMFDIR) + +# Add -DLargeFptr for 64-bit addresses +CDEFINES=-DAdd_ $(SLUDEF) $(UMFDEF) +FDEFINES=-DNORMAL + +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR = lib +LIBNAME = libpsblas.a + +TYPEMODS = psb_spmat_type$(.mod) psb_descriptor_type$(.mod) psb_prec_type$(.mod) psb_realloc_mod$(.mod) +CONSTMODS = psb_const_mod$(.mod) +BLASMODS = $(TYPEMODS) psb_psblas_mod$(.mod) psb_comm_mod$(.mod) +METHDMODS = psb_methd_mod$(.mod) +TOOLSMODS = $(CONSTMODS) psi_mod$(.mod) psb_tools_mod$(.mod) psb_serial_mod$(.mod) +PRECMODS = psb_prec_mod$(.mod) +ERRORMODS = psb_error_mod$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) $(ERRORMODS) string$(.mod) + +MODS=$(LIBDIR)/psb_const_mod$(.mod) $(LIBDIR)/psb_spmat_type$(.mod) $(LIBDIR)/psb_realloc_mod$(.mod) \ + $(LIBDIR)/psb_descriptor_type$(.mod) $(LIBDIR)/psb_prec_type$(.mod) $(LIBDIR)/parts.fh \ + $(LIBDIR)/psb_serial_mod$(.mod) $(LIBDIR)/psb_comm_mod$(.mod) $(LIBDIR)/psb_error_mod$(.mod) + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< +.F90.o: + $(F90) $(F90COPT) $(INCDIRS) $(FDEFINES) -c $< diff --git a/Make.inc.gcc42 b/Make.inc.gcc42 new file mode 100644 index 00000000..87269cd9 --- /dev/null +++ b/Make.inc.gcc42 @@ -0,0 +1,86 @@ +.mod=.mod +.fh=.fh +.SUFFIXES: .f90 $(.mod) .F90 + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +F90=/usr/local/gcc42/bin/gfortran +FC=/usr/local/gcc42/bin/gfortran +F77=$(FC) +CC=/usr/local/gcc42/bin/gcc +F90COPT=-O3 -ggdb +FCOPT=-O3 -ggdb +CCOPT=-O3 -ggdb + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +F90LINK=/usr/local/mpich-gcc42/bin/mpif90 +FLINK=/usr/local/mpich-gcc42/bin/mpif77 +MPF90=/usr/local/mpich-gcc42/bin/mpif90 +MPF77=/usr/local/mpich-gcc42/bin/mpif77 +MPCC=/usr/local/mpich-gcc42/bin/mpicc + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lblas-gcc42 -L$(HOME)/LIB +BLACS=-lmpiblacs-gcc42 -L$(HOME)/LIB + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +SLUDIR=/usr/local/SuperLU_3.0 +SLU=-lslu_lx_gcc42 -L$(SLUDIR) +SLUDEF=-DHave_SLU_ -I$(SLUDIR) + +UMFDIR=$(HOME)/LIB/Umfpack_gcc41 +UMF=-lumfpack -lamd -L$(UMFDIR) +UMFDEF=-DHave_UMF_ -I$(UMFDIR) +# +# We are using the public domain tool METIS from U. Minnesota. To get it +# check URL http://www.cs.umn.edu:~karypis +# +METIS_LIB = -L$(HOME)/NUMERICAL/metis-4.0 -lmetis +LDLIBS=$(BLACS) $(SLU) $(UMF) $(BLAS) $(METIS_LIB) + +# Add -DLargeFptr for 64-bit addresses +CDEFINES=-DAdd_ $(SLUDEF) $(UMFDEF) +FDEFINES=-DNETLIB_BLACS -DHAVE_MOVE_ALLOC + +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR=lib +BASELIBNAME=libpsb_base.a +PRECLIBNAME=libpsb_prec.a +METHDLIBNAME=libpsb_krylov.a +UTILLIBNAME=libpsb_util.a + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< +.F90.o: + $(F90) $(F90COPT) $(INCDIRS) $(FDEFINES) -c $< + + + + diff --git a/Make.inc.ifc9 b/Make.inc.ifc9 new file mode 100644 index 00000000..7f319174 --- /dev/null +++ b/Make.inc.ifc9 @@ -0,0 +1,88 @@ +.mod=.mod +.fh=.fh +.SUFFIXES: .f90 $(.mod) .F90 + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +IFC9=/opt/intel/fc/9.1.033 +F90=$(IFC9)/bin/ifort +FC=$(IFC9)/bin/ifort +F77=$(FC) +CC=gcc +F90COPT=-O3 +FCOPT=-O3 +CCOPT=-O3 + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +F90LINK=/usr/local/mpich-ifc91/bin/mpif90 +FLINK=/usr/local/mpich-ifc91/bin/mpif77 +MPF90=/usr/local/mpich-ifc91/bin/mpif90 +MPF77=/usr/local/mpich-ifc91/bin/mpif77 +MPCC=/usr/local/mpich-ifc91/bin/mpicc + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lblas-intel -L$(HOME)/NUMERICAL/LIB +BLACS=-lmpiblacs-intel -L$(HOME)/NUMERICAL/LIB + + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +SLUDIR=/usr/local/SuperLU_3.0 +SLU=-lslu_lx_ifc9 -L$(SLUDIR) +SLUDEF=-DHave_SLU_ -I$(SLUDIR) + +UMFDIR=$(HOME)/LIB/Umfpack_gcc41 +UMF=-lumfpack -lamd -L$(UMFDIR) +UMFDEF=-DHave_UMF_ -I$(UMFDIR) +# +# We are using the public domain tool METIS from U. Minnesota. To get it +# check URL http://www.cs.umn.edu:~karypis +# +METIS_LIB = -L$(HOME)/NUMERICAL/metis-4.0 -lmetis +LDLIBS=$(BLACS) $(SLU) $(UMF) $(BLAS) $(METIS_LIB) + +# Add -DLargeFptr for 64-bit addresses +CDEFINES=-DAdd_ $(SLUDEF) $(UMFDEF) +FDEFINES=-DNETLIB_BLACS -DHAVE_MOVE_ALLOC + +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR=lib +BASELIBNAME=libpsb_base.a +PRECLIBNAME=libpsb_prec.a +METHDLIBNAME=libpsb_krylov.a +UTILLIBNAME=libpsb_util.a + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< +.F90.o: + $(F90) $(F90COPT) $(INCDIRS) $(FDEFINES) -c $< + + + + diff --git a/Make.inc.rs6k b/Make.inc.rs6k new file mode 100644 index 00000000..b7b8d832 --- /dev/null +++ b/Make.inc.rs6k @@ -0,0 +1,86 @@ +.mod=.mod +.fh=.fh +.SUFFIXES: .f90 $(.mod) .F90 + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +F90=xlf95 -qsuffix=f=f90:cpp=F90 +FC=xlf +F77=$(FC) +CC=xlc +F90COPT= -O3 +FCOPT=-O3 +CCOPT=-O3 + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +MPF90=mpxlf95 -qsuffix=f=f90 +F90LINK=$(MPF90) +FLINK=$(MPF90) +MPF77=mpxlf95 -qfixed +MPCC=mpxlc + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lessl +BLACS=-lmpiblacs + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +#SLUDIR=/usr/local/SuperLU_3.0 +#SLU=-lslu -L$(SLUDIR) +#SLUDEF=-DHave_SLU_ -I$(SLUDIR) + +#UMFDIR=$(HOME)/LIB/Umfpack +#UMF=-lumfpack -lamd -L$(UMFDIR) +#UMFDEF=-DHave_UMF_ -I$(UMFDIR) +# +# We are using the public domain tool METIS from U. Minnesota. To get it +# check URL http://www.cs.umn.edu:~karypis +# +METIS_LIB = -L$(HOME)/NUMERICAL/metis-4.0 -lmetis +LDLIBS=$(BLACS) $(SLU) $(UMF) $(BLAS) $(METIS_LIB) + +# Add -DLargeFptr for 64-bit addresses +CDEFINES=-DNoChange $(SLUDEF) $(UMFDEF) -DLargeFptr +FDEFINES=-WF,-DESSL_BLACS -WF,-DHAVE_MOVE_ALLOC + +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR=lib +BASELIBNAME=libpsb_base.a +PRECLIBNAME=libpsb_prec.a +METHDLIBNAME=libpsb_krylov.a +UTILLIBNAME=libpsb_util.a + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< +.F90.o: + $(F90) $(F90COPT) $(INCDIRS) $(FDEFINES) -c $< + + + + diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d93871a1 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +include ./Make.inc + + +LIBDIR=../psblas2/lib +HERE=. +INCDIRS=-I. -I$(LIBDIR) + +MODOBJS= psb_prec_type.o psb_prec_mod.o +MPFOBJS=psb_dbldaggrmat.o psb_zbldaggrmat.o +F90OBJS=psb_dasmatbld.o psb_dslu_bld.o psb_dumf_bld.o psb_dilu_fct.o\ + psb_dmlprc_bld.o psb_dsp_renum.o psb_dilu_bld.o \ + psb_dprecbld.o psb_dprecfree.o psb_dprecset.o \ + psb_dbaseprc_bld.o psb_ddiagsc_bld.o psb_dgenaggrmap.o \ + psb_dprc_aply.o psb_dmlprc_aply.o \ + psb_dbaseprc_aply.o psb_dbjac_aply.o\ + psb_zasmatbld.o psb_zslu_bld.o psb_zumf_bld.o psb_zilu_fct.o\ + psb_zmlprc_bld.o psb_zsp_renum.o psb_zilu_bld.o \ + psb_zprecbld.o psb_zprecfree.o psb_zprecset.o \ + psb_zbaseprc_bld.o psb_zdiagsc_bld.o psb_zgenaggrmap.o \ + psb_zprc_aply.o psb_zmlprc_aply.o \ + psb_zbaseprc_aply.o psb_zbjac_aply.o\ + $(MPFOBJS) +COBJS=psb_slu_impl.o psb_umf_impl.o psb_zslu_impl.o psb_zumf_impl.o +OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MODOBJS) + +LIBMOD=psb_prec_mod$(.mod) +LOCAL_MODS=$(LIBMOD) psb_prec_type$(.mod) +LIBNAME=$(PRECLIBNAME) + +lib: mpobjs $(OBJS) + $(AR) $(HERE)/$(LIBNAME) $(OBJS) + $(RANLIB) $(HERE)/$(LIBNAME) + /bin/cp -p $(HERE)/$(LIBNAME) $(LIBDIR) + /bin/cp -p $(LIBMOD) $(LIBDIR) + +$(F90OBJS) $(MPFOBJS): $(MODOBJS) +psb_prec_mod.o: psb_prec_type.o + +mpobjs: + (make $(MPFOBJS) F90="$(MPF90)" F90COPT="$(F90COPT)") + +veryclean: clean + /bin/rm -f $(LIBNAME) + +clean: + /bin/rm -f $(OBJS) $(LOCAL_MODS) diff --git a/README b/README new file mode 100644 index 00000000..e8fd6f93 --- /dev/null +++ b/README @@ -0,0 +1,130 @@ +This directory contains the PSBLAS library, version 2.1.0 + + +Version 1.0 of the library was described in: +S. Filippone, M. Colajanni +PSBLAS: A library for parallel linear algebra computation on sparse matrices +ACM Trans. on Math. Software, 26(4), Dec. 2000, pp. 527-550. + +PLATFORMS: + +The compilation process relies on the choice of an appropriate +Make.inc file; we have tested with AIX XLF, Intel ifc/Linux, Lahey +F95/Linux, Nag f95/Linux, GNU Fortran/Linux. If you succeed in compiling with +other compiler/operating systems please let us know. + + +LINUX: + +There finally exist a GNU Fortran 95 implementation: we are using the +development snapshots from GCC 3.5.0, later 4.1 and 4.2 since July +2004, and it appears to work. The 4.2 version of GNU compilers is now +our reference platform; the official 4.2.0 release is due pretty +soon. It now includes support for ALLOCATABLES. + + +The Lahey version we got access to (6.0 and 6.1) seems to suffer from +spurious extra copies problem; this is most apparent in the matrix +build process. + +For the Intel compilers, we recommend moving to version 9; previous +versions of the library have been compiled with version 7 and 8 of +ifort. + +IBM SP. +The library has been tested on an IBM SP2, SP4 and SP5, with XLC and XLF +compilers, and a version of the BLACS based on MPI. +The setting + F90=xlf90 -qsuffix=f=f90 +in Make.inc.rs6k takes care of the f90 extension. +WARNING: xlf 8.1 introduced a performance bug, whereas a Fortan 90 +code calling a Fortan 77 code would incur spurious array copies; +please make sure your system has the PTF xlf 8102 installed. + + + +UTILITIES +The TEST/Fileread directory contains some utilities to convert to/from +Harwell-Boeing and MatrixMarket file formats. + + +DOCUMENTATION + +See userguide.pdf +Please consult the sample programs, especially TEST/pargen/ppde90.f90. + + +OTHER SOFTWARE CREDITS + +We include our modified implementation of some of the Sparker (serial +sparse BLAS) material, e.g. Jagged diagonal, plus a number of +extensions of our own design. The original file spblas.f can be +downloaded from matisa.cc.rl.ac.uk; of course any bugs in our +implementation are our own to fix. The main reference for the serial +sparse BLAS is: +Duff, I., Marrone, M., Radicati, G., and Vittoli, C. +Level 3 basic linear algebra subprograms for sparse matrices: a user +level interface +ACM Trans. Math. Softw., 23(3), 379-401, 1997. + +In the multilevel preconditioners we use SMMP by Randolph E. Bank and +Craig C. Douglas na.bank@na-net.ornl.gov and +na.cdouglas@na-net.ornl.gov; we wrapped it in a Fortran 95 interface +with dynamic memory allocation. + +To compile and run our software you will need + +1. A working version of MPI + +2. The MPI version of the BLACS from + http://www.netlib.org/blacs/index.html + +3. A version of the BLAS; if you don't have a specific version for + your platform you may try ATLAS available from + http://math-atlas.sourceforge.net/ + +4. We have had good results with the METIS library, from + http://www-users.cs.umn.edu/~karypis/metis/metis/main.html + This is not necessary to compile our library, but the test program + in test/Fileread assumes you have it installed. + +5. For our preconditioners we include interfaces to the following + software packages: + -- SuperLU 3.0 http://crd.lbl.gov/~xiaoye/SuperLU/ + -- UMFPACK 4.4 http://www.cise.ufl.edu/research/sparse/umfpack/ + These are optional, you only need to install them if you actually + want to use them. + + +TODO: +- As soon as TR 15581 and MOVE_ALLOC become available in GNU Fortran + we shall move from POINTERS to ALLOCATABLE for both vectors and + derived data types components. +- The GLOB_TO_LOC array should be changed for large test cases. + + +The PSBLAS team. + + +Contact: Salvatore Filippone salvatore.filippone@uniroma2.it + +Credits for version 2.0: +Salvatore Filippone +Alfredo Buttari + +The MD2P4 multilevel parallel preconditioners contained in directory +src/prec were developed with the contribution of: +Pasqua D'Ambra +Daniela di Serafino +They are still in an early experimental stage, use at your own risk! + +Credits for version 1.0: +Salvatore Filippone +Michele Colajanni +Fabio Cerioni +Stefano Maiolatesi +Dario Pascucci + + + + diff --git a/psb_dasmatbld.f90 b/psb_dasmatbld.f90 new file mode 100644 index 00000000..a15740f2 --- /dev/null +++ b/psb_dasmatbld.f90 @@ -0,0 +1,245 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +!***************************************************************************** +!* * +!* This routine does two things: * +!* 1. Builds the auxiliary descriptor. This is always done even for * +!* Block Jacobi. * +!* 2. Retrieves the remote matrix pieces. * +!* * +!* All of 1. is done under psb_cdovr, which is independent of CSR, and * +!* has been placed in the TOOLS directory because it might be used for * +!* building a descriptor for an extended stencil in a PDE solver without * +!* necessarily applying AS precond. * +!* * +!* * +!* * +!* * +!* * +!***************************************************************************** +Subroutine psb_dasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + + use psb_base_mod + use psb_prec_type + Implicit None + + ! .. Array Arguments .. + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + integer, intent(out) :: info + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + character(len=5), optional :: outfmt + + + real(kind(1.d0)) :: t1,t2,t3,mpi_wtime + external mpi_wtime + integer icomm + + ! .. Local Scalars .. + Integer :: k, np,me,m,nnzero,& + & ictxt, n_col,ier,n,int_err(5),& + & tot_recv, ircode, n_row,nhalo, nrow_a,err_act + Logical,Parameter :: debug=.false., debugprt=.false. + character(len=20) :: name, ch_err + name='psb_dasmatbld' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + If(debug) Write(0,*)'IN DASMATBLD ', upd + ictxt=desc_data%matrix_data(psb_ctxt_) + Call psb_info(ictxt, me, np) + + tot_recv=0 + + nrow_a = desc_data%matrix_data(psb_n_row_) + nnzero = Size(a%aspk) + n_col = desc_data%matrix_data(psb_n_col_) + nhalo = n_col-nrow_a + + + If (ptype == bja_) Then + ! + ! Block Jacobi. Copy the descriptor, just in case we want to + ! do the renumbering. + ! + If(debug) Write(0,*)' asmatbld calling allocate ' + call psb_sp_all(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida = 'COO' + blk%infoa(psb_nnz_) = 0 + If(debug) Write(0,*)' asmatbld done spallocate' + If (upd == 'F') Then + call psb_cdcpy(desc_data,desc_p,info) + If(debug) Write(0,*)' asmatbld done cdcpy' + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + + Else If (ptype == asm_) Then + + + ! + ! Additive Schwarz variant. + ! + ! + + + if (novr < 0) then + info=3 + int_err(1)=novr + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (novr == 0) then + ! + ! This is really just Block Jacobi..... + ! + If(debug) Write(0,*)' asmatbld calling allocate novr=0' + call psb_sp_all(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida='COO' + blk%infoa(psb_nnz_)=0 + if (debug) write(0,*) 'Calling desccpy' + if (upd == 'F') then + call psb_cdcpy(desc_data,desc_p,info) + If(debug) Write(0,*)' asmatbld done cdcpy' + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'Early return from asmatbld: P>=3 N_OVR=0' + endif + return + endif + + call psb_get_mpicomm(ictxt,icomm) + + If(debug)Write(0,*)'BEGIN dasmatbld',me,upd,novr + t1 = mpi_wtime() + + If (upd == 'F') Then + ! + ! Build the auiliary descriptor',desc_p%matrix_data(psb_n_row_) + ! + call psb_cdbldovr(a,desc_data,novr,desc_p,info) + if(info /= 0) then + info=4010 + ch_err='psb_cdbldovr' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + Endif + + if(debug) write(0,*) me,' From cdbldovr _:',desc_p%matrix_data(psb_n_row_),& + & desc_p%matrix_data(psb_n_col_) + + + n_row = desc_p%matrix_data(psb_n_row_) + t2 = mpi_wtime() +!!$ open(60+me) +!!$ call psb_cdprt(60+me,desc_p,short=.false.) +!!$ call flush(60+me) +!!$ close(60+me) +!!$ call psb_barrier(ictxt) + if (debug) write(0,*) 'Before sphalo ',blk%fida,blk%m,psb_nnz_,blk%infoa(psb_nnz_) +!!$ ierr = MPE_Log_event( iovrb, 0, "st OVR" ) +!!$ blk%m = n_row-nrow_a +!!$ blk%k = n_row + + if (present(outfmt)) then + if(debug) write(0,*) me,': Calling SPHALO with ',size(blk%ia2) + Call psb_sphalo(a,desc_p,blk,info,outfmt=outfmt) + else + if(debug) write(0,*) me,': Calling SPHALO with ',size(blk%ia2) + Call psb_sphalo(a,desc_p,blk,info) + end if + + + if(info /= 0) then + info=4010 + ch_err='psb_sphalo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) 'After psb_sphalo ',blk%fida,blk%m,psb_nnz_,blk%infoa(psb_nnz_) +!!$ ierr = MPE_Log_event( iovre, 0, "ed OVR" ) + + t3 = mpi_wtime() + if (debugprt) then + open(40+me) + call psb_csprt(40+me,blk,head='% Ovrlap rows') + close(40+me) + endif + + + End If + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +End Subroutine psb_dasmatbld + diff --git a/psb_dbaseprc_aply.f90 b/psb_dbaseprc_aply.f90 new file mode 100644 index 00000000..9bf9012b --- /dev/null +++ b/psb_dbaseprc_aply.f90 @@ -0,0 +1,281 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a a basic preconditioner stored in prec + ! + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col, int_err(5) + real(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:) + character ::diagl, diagu + integer :: ictxt,np,me,i, isz, nrg, err_act + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name, ch_err + + interface psb_bjac_aply + subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type), intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_dbjac_aply + end interface + + name='psb_dbaseprc_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + info=40 + int_err(1)=6 + ch_err(2:2)=trans + goto 9999 + end select + + select case(prec%iprcparm(p_type_)) + + case(noprec_) + + call psb_geaxpby(alpha,x,beta,y,desc_data,info) + + case(diagsc_) + + if (size(work) >= size(x)) then + ww => work + else + allocate(ww(size(x)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + end if + + n_row=desc_data%matrix_data(psb_n_row_) + ww(1:n_row) = x(1:n_row)*prec%d(1:n_row) + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + if (size(work) < size(x)) then + deallocate(ww,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Deallocate') + goto 9999 + end if + end if + + case(bja_) + + call psb_bjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjac_aply' + goto 9999 + end if + + case(asm_,ras_,ash_,rash_) + + if (prec%iprcparm(n_ovr_)==0) then + ! shortcut: this fixes performance for RAS(0) == BJA + call psb_bjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjacaply' + goto 9999 + end if + + else + ! Note: currently trans is unused. + n_row=prec%desc_data%matrix_data(psb_n_row_) + n_col=prec%desc_data%matrix_data(psb_n_col_) + + isz=max(n_row,N_COL) + if ((6*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + aux => work(3*isz+1:) + else if ((4*isz) <= size(work)) then + aux => work(1:) + allocate(ww(isz),tx(isz),ty(isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + else if ((3*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + allocate(aux(4*isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + else + allocate(ww(isz),tx(isz),ty(isz),& + &aux(4*isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + + if (debugprt) write(0,*)' vdiag: ',prec%d(:) + if (debug) write(0,*) 'Bi-CGSTAB with Additive Schwarz prec' + + tx(1:desc_data%matrix_data(psb_n_row_)) = x(1:desc_data%matrix_data(psb_n_row_)) + tx(desc_data%matrix_data(psb_n_row_)+1:isz) = dzero + + if (prec%iprcparm(restr_)==psb_halo_) then + call psb_halo(tx,prec%desc_data,info,work=aux) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + else if (prec%iprcparm(restr_) /= psb_none_) then + write(0,*) 'Problem in PRC_APLY: Unknown value for restriction ',& + &prec%iprcparm(restr_) + end if + + if (prec%iprcparm(iren_)>0) then + call dgelp('N',n_row,1,prec%perm,tx,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_dgelp' + goto 9999 + end if + endif + + call psb_bjac_aply(done,prec,tx,dzero,ty,prec%desc_data,trans,aux,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjac_aply' + goto 9999 + end if + + if (prec%iprcparm(iren_)>0) then + call dgelp('N',n_row,1,prec%invperm,ty,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_dgelp' + goto 9999 + end if + endif + + select case (prec%iprcparm(prol_)) + + case(psb_none_) + ! Would work anyway, but since it's supposed to do nothing... + ! call f90_psovrl(ty,prec%desc_data,update=prec%a_restrict) + + case(psb_sum_,psb_avg_) + call psb_ovrl(ty,prec%desc_data,info,& + & update=prec%iprcparm(prol_),work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + + case default + write(0,*) 'Problem in PRC_APLY: Unknown value for prolongation ',& + & prec%iprcparm(prol_) + end select + + call psb_geaxpby(alpha,ty,beta,y,desc_data,info) + + + if ((6*isz) <= size(work)) then + else if ((4*isz) <= size(work)) then + deallocate(ww,tx,ty) + else if ((3*isz) <= size(work)) then + deallocate(aux) + else + deallocate(ww,aux,tx,ty) + endif + end if + case default + write(0,*) 'Invalid PRE%PREC ',prec%iprcparm(p_type_),':',& + & min_prec_,noprec_,diagsc_,bja_,& + & asm_,ras_,ash_,rash_ + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbaseprc_aply + diff --git a/psb_dbaseprc_bld.f90 b/psb_dbaseprc_bld.f90 new file mode 100644 index 00000000..6b5c326a --- /dev/null +++ b/psb_dbaseprc_bld.f90 @@ -0,0 +1,267 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dbaseprc_bld(a,desc_a,p,info,upd) + + use psb_base_mod + use psb_prec_type + Implicit None + + type(psb_dspmat_type), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_dbaseprc_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + + interface psb_diagsc_bld + subroutine psb_ddiagsc_bld(a,desc_data,p,upd,info) + use psb_base_mod + use psb_prec_type + integer, intent(out) :: info + type(psb_dspmat_type), intent(in), target :: a + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(inout) :: p + character, intent(in) :: upd + end subroutine psb_ddiagsc_bld + end interface + + interface psb_ilu_bld + subroutine psb_dilu_bld(a,desc_data,p,upd,info) + use psb_base_mod + use psb_prec_type + integer, intent(out) :: info + type(psb_dspmat_type), intent(in), target :: a + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(inout) :: p + character, intent(in) :: upd + end subroutine psb_dilu_bld + end interface + + interface psb_slu_bld + subroutine psb_dslu_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_dslu_bld + end interface + + interface psb_umf_bld + subroutine psb_dumf_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_dumf_bld + end interface + + ! Local scalars + Integer :: err, nnzero, n_row, n_col,I,j,k,ictxt,& + & me,mycol,np,npcol,mglob,lw, mtype, nrg, nzg, err_act + real(kind(1.d0)) :: temp, real_err(5) + real(kind(1.d0)),pointer :: gd(:), work(:) + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_baseprc_bld' + + if (debug) write(0,*) 'Entering baseprc_bld' + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + mglob = psb_cd_get_global_rows(desc_a) + + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' + call psb_info(ictxt, me, np) + + if (present(upd)) then + if (debug) write(0,*) 'UPD ', upd + if ((UPD.eq.'F').or.(UPD.eq.'T')) then + IUPD=UPD + else + IUPD='F' + endif + else + IUPD='F' + endif + + ! + ! Should add check to ensure all procs have the same... + ! + ! ALso should define symbolic names for the preconditioners. + ! + + call psb_check_def(p%iprcparm(p_type_),'base_prec',& + & diagsc_,is_legal_base_prec) + +!!$ allocate(p%desc_data,stat=info) +!!$ if (info /= 0) then +!!$ call psb_errpush(4010,name,a_err='Allocate') +!!$ goto 9999 +!!$ end if + + call psb_nullify_desc(p%desc_data) + + select case(p%iprcparm(p_type_)) + case (noprec_) + ! Do nothing. + call psb_cdcpy(desc_a,p%desc_data,info) + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case (diagsc_) + + call psb_diagsc_bld(a,desc_a,p,iupd,info) + if(debug) write(0,*)me,': out of psb_diagsc_bld' + if(info /= 0) then + info=4010 + ch_err='psb_diagsc_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case (bja_,asm_) + + call psb_check_def(p%iprcparm(n_ovr_),'overlap',& + & 0,is_legal_n_ovr) + call psb_check_def(p%iprcparm(restr_),'restriction',& + & psb_halo_,is_legal_restrict) + call psb_check_def(p%iprcparm(prol_),'prolongator',& + & psb_none_,is_legal_prolong) + call psb_check_def(p%iprcparm(iren_),'renumbering',& + & renum_none_,is_legal_renum) + call psb_check_def(p%iprcparm(f_type_),'fact',& + & f_ilu_n_,is_legal_ml_fact) + + if (debug) write(0,*)me, ': Calling PSB_ILU_BLD' + if (debug) call psb_barrier(ictxt) + + select case(p%iprcparm(f_type_)) + + case(f_ilu_n_,f_ilu_e_) + call psb_ilu_bld(a,desc_a,p,iupd,info) + if(debug) write(0,*)me,': out of psb_ilu_bld' + if (debug) call psb_barrier(ictxt) + if(info /= 0) then + info=4010 + ch_err='psb_ilu_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_slu_) + + if(debug) write(0,*)me,': calling slu_bld' + call psb_slu_bld(a,desc_a,p,info) + if(info /= 0) then + info=4010 + ch_err='slu_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_umf_) + if(debug) write(0,*)me,': calling umf_bld' + call psb_umf_bld(a,desc_a,p,info) + if(debug) write(0,*)me,': Done umf_bld ',info + if (info /= 0) then + info=4010 + ch_err='umf_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_none_) + write(0,*) 'Fact=None in BASEPRC_BLD Bja/ASM??' + info=4010 + ch_err='Inconsistent prec f_none_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + case default + write(0,*) 'Unknown factor type in baseprc_bld bja/asm: ',& + &p%iprcparm(f_type_) + info=4010 + ch_err='Unknown f_type_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select + case default + info=4010 + ch_err='Unknown p_type_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end select + + p%base_a => a + p%base_desc => desc_a + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbaseprc_bld + diff --git a/psb_dbjac_aply.f90 b/psb_dbjac_aply.f90 new file mode 100644 index 00000000..f77dcafd --- /dev/null +++ b/psb_dbjac_aply.f90 @@ -0,0 +1,270 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a a Block Jacobi preconditioner stored in prec + ! Note that desc_data may or may not be the same as prec%desc_data, + ! but since both are INTENT(IN) this should be legal. + ! + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type), intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col + real(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:),tb(:) + character ::diagl, diagu + integer :: ictxt,np,me,i, nrg, err_act, int_err(5) + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name, ch_err + + name='psb_bjac_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_data) + call psb_info(ictxt, me, np) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + call psb_errpush(40,name) + goto 9999 + end select + + + n_row=desc_data%matrix_data(psb_n_row_) + n_col=desc_data%matrix_data(psb_n_col_) + + if (n_col <= size(work)) then + ww => work(1:n_col) + if ((4*n_col+n_col) <= size(work)) then + aux => work(n_col+1:) + else + allocate(aux(4*n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + else + allocate(ww(n_col),aux(4*n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + endif + + + if (prec%iprcparm(jac_sweeps_) == 1) then + + + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + + select case(trans) + case('N','n') + + call psb_spsm(done,prec%av(l_pr_),x,dzero,ww,desc_data,info,& + & trans='N',unit=diagl,choice=psb_none_,work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(alpha,prec%av(u_pr_),ww,beta,y,desc_data,info,& + & trans='N',unit=diagu,choice=psb_none_, work=aux) + if(info /=0) goto 9999 + + case('T','t','C','c') + call psb_spsm(done,prec%av(u_pr_),x,dzero,ww,desc_data,info,& + & trans=trans,unit=diagu,choice=psb_none_, work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(alpha,prec%av(l_pr_),ww,beta,y,desc_data,info,& + & trans=trans,unit=diagl,choice=psb_none_,work=aux) + if(info /=0) goto 9999 + + end select + + case(f_slu_) + + ww(1:n_row) = x(1:n_row) + + select case(trans) + case('N','n') + call psb_dslu_solve(0,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + case('T','t','C','c') + call psb_dslu_solve(1,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + end select + + if(info /=0) goto 9999 + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + case (f_umf_) + + + select case(trans) + case('N','n') + call psb_dumf_solve(0,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) + case('T','t','C','c') + call psb_dumf_solve(1,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) + end select + + if(info /=0) goto 9999 + + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + case default + write(0,*) 'Unknown factorization type in bjac_aply',prec%iprcparm(f_type_) + end select + if (debugprt) write(0,*)' Y: ',y(:) + + else if (prec%iprcparm(jac_sweeps_) > 1) then + + ! Note: we have to add TRANS to this one !!!!!!!!! + + if (size(prec%av) < ap_nd_) then + info = 4011 + goto 9999 + endif + + allocate(tx(n_col),ty(n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + tx = dzero + ty = dzero + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + call psb_spsm(done,prec%av(l_pr_),ty,dzero,ww,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(done,prec%av(u_pr_),ww,dzero,tx,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if(info /=0) goto 9999 + end do + + case(f_slu_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + + call psb_dslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(slu_ptr_),info) + if(info /=0) goto 9999 + tx(1:n_row) = ty(1:n_row) + end do + case(f_umf_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-done,prec%av(ap_nd_),tx,done,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + + call psb_dumf_solve(0,n_row,ww,ty,n_row,& + & prec%iprcparm(umf_numptr_),info) + if(info /=0) goto 9999 + tx(1:n_row) = ww(1:n_row) + end do + + end select + + call psb_geaxpby(alpha,tx,beta,y,desc_data,info) + + + deallocate(tx,ty) + + + else + + goto 9999 + + endif + + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then + else + deallocate(aux) + endif + else + deallocate(ww,aux) + endif + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbjac_aply + diff --git a/psb_dbldaggrmat.f90 b/psb_dbldaggrmat.f90 new file mode 100644 index 00000000..9f599385 --- /dev/null +++ b/psb_dbldaggrmat.f90 @@ -0,0 +1,1043 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dbldaggrmat(a,desc_a,ac,desc_ac,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_dspmat_type), intent(in), target :: a + type(psb_dspmat_type), intent(inout), target :: ac + type(psb_desc_type), intent(in) :: desc_a + type(psb_desc_type), intent(inout) :: desc_ac + type(psb_dbaseprc_type), intent(inout), target :: p + integer, intent(out) :: info + + logical, parameter :: aggr_dump=.false. + integer ::ictxt,np,me, err_act + character(len=20) :: name, ch_err + name='psb_dbldaggrmat' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + select case (p%iprcparm(smth_kind_)) + case (no_smth_) + + call raw_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='raw_aggregate') + goto 9999 + end if + if (aggr_dump) call psb_csprt(90+me,ac,head='% Raw aggregate.') + + case(smth_omg_,smth_biz_) + if (aggr_dump) call psb_csprt(70+me,a,head='% Input matrix') + call smooth_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='smooth_aggregate') + goto 9999 + end if + if (aggr_dump) call psb_csprt(90+me,ac,head='% Smooth aggregate.') + case default + call psb_errpush(4010,name,a_err=name) + goto 9999 + + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine raw_aggregate(info) + use psb_base_mod + use psb_prec_type + use mpi + implicit none + + integer, intent(out) :: info + type(psb_dspmat_type) :: b + integer, pointer :: nzbr(:), idisp(:) + integer :: ictxt, nrow, nglob, ncol, ntaggr, nzac, ip, ndx,& + & naggr, np, me, nzt,jl,nzl,nlr,& + & icomm,naggrm1, i, j, k, err_act + + name='raw_aggregate' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + call psb_nullify_sp(b) + + ictxt = psb_cd_get_context(desc_a) + call psb_info(ictxt, me, np) + nglob = psb_cd_get_global_rows(desc_a) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + naggr = p%nlaggr(me+1) + ntaggr = sum(p%nlaggr) + allocate(nzbr(np), idisp(np),stat=info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + naggrm1=sum(p%nlaggr(1:me)) + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + do i=1, nrow + p%mlia(i) = p%mlia(i) + naggrm1 + end do + call psb_halo(p%mlia,desc_a,info) + end if + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_halo') + goto 9999 + end if + + nzt = psb_sp_get_nnzeros(a) + + call psb_sp_all(b,nzt,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + call psb_sp_setifld(psb_dupl_ovwrt_,psb_dupl_,b,info) + call psb_sp_setifld(psb_upd_dflt_,psb_upd_,b,info) + b%fida = 'COO' + b%m=a%m + b%k=a%k + call psb_csdp(a,b,info) + if(info /= 0) then + info=4010 + ch_err='psb_csdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzt = psb_sp_get_nnzeros(b) + + j = 0 + do i=1, nzt + if ((1<=b%ia2(i)).and.(b%ia2(i)<=nrow)) then + j = j + 1 + b%aspk(j) = b%aspk(i) + b%ia1(j) = p%mlia(b%ia1(i)) + b%ia2(j) = p%mlia(b%ia2(i)) + end if + enddo + b%infoa(psb_nnz_)=j + call psb_fixcoo(b,info) + + nzt = psb_sp_get_nnzeros(b) + + call psb_sp_reall(b,nzt,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spreall') + goto 9999 + end if + b%m = naggr + b%k = naggr + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = nzt + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,ac%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + else if (p%iprcparm(coarse_mat_) == mat_distr_) then + + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + + call psb_sp_clone(b,ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + !if(.not.associated(p%av(ap_nd_)%aspk)) p%iprcparm(jac_sweeps_) = 1 + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + call psb_sp_all(ac%m,ac%k,p%av(ap_nd_),nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + if(.not.allocated(p%av(ap_nd_)%aspk)) write(0,*) '.not.associated(p%av(ap_nd_)%ia1)' + if(.not.allocated(p%av(ap_nd_)%ia1)) write(0,*) '.not.associated(p%av(ap_nd_)%ia1)' + !write(0,*) 'ok line 238' + + k=0 + do i=1,nzl + if (ac%ia2(i)>ac%m) then + k = k + 1 + p%av(ap_nd_)%aspk(k) = ac%aspk(i) + p%av(ap_nd_)%ia1(k) = ac%ia1(i) + p%av(ap_nd_)%ia2(k) = ac%ia2(i) + endif + enddo + p%av(ap_nd_)%infoa(psb_nnz_) = k + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + !write(0,*) 'operations in bldaggrmat are ok !' + !------------------------------------------------------------------ + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + else + + write(0,*) 'Unknown p%iprcparm(coarse_mat) in aggregate_sp',p%iprcparm(coarse_mat_) + end if + + call psb_ipcoo2csr(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + deallocate(nzbr,idisp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine raw_aggregate + + + + subroutine smooth_aggregate(info) + use psb_base_mod + use psb_prec_type + use mpi + implicit none + + integer, intent(out) :: info + + type(psb_dspmat_type) :: b + integer, pointer :: nzbr(:), idisp(:), ivall(:) + integer :: ictxt, nrow, nglob, ncol, ntaggr, nzac, ip, ndx,& + & naggr, np, me, & + & icomm, naggrm1,naggrp1,i,j,err_act,k,nzl + type(psb_dspmat_type), pointer :: am1,am2 + type(psb_dspmat_type) :: am3,am4 + logical :: ml_global_nmb + + logical, parameter :: test_dump=.false.,debug=.false. + integer, parameter :: ncmax=16 + real(kind(1.d0)) :: omega, anorm, tmp, dg + character(len=20) :: name + + + name='smooth_aggregate' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + ictxt = psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + call psb_nullify_sp(b) + call psb_nullify_sp(am3) + call psb_nullify_sp(am4) + + am2 => p%av(sm_pr_t_) + am1 => p%av(sm_pr_) + + nglob = psb_cd_get_global_rows(desc_a) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + naggr = p%nlaggr(me+1) + ntaggr = sum(p%nlaggr) + + allocate(nzbr(np), idisp(np),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + naggrm1 = sum(p%nlaggr(1:me)) + naggrp1 = sum(p%nlaggr(1:me+1)) + + ml_global_nmb = ( (p%iprcparm(smth_kind_) == smth_omg_).or.& + & ( (p%iprcparm(smth_kind_) == smth_biz_).and.& + & (p%iprcparm(coarse_mat_) == mat_repl_)) ) + + + if (ml_global_nmb) then + p%mlia(1:nrow) = p%mlia(1:nrow) + naggrm1 + call psb_halo(p%mlia,desc_a,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='f90_pshalo') + goto 9999 + end if + end if + + if (aggr_dump) then + open(30+me) + write(30+me,*) '% Aggregation map' + do i=1,ncol + write(30+me,*) i,p%mlia(i) + end do + close(30+me) + end if + + ! naggr: number of local aggregates + ! nrow: local rows. + ! + allocate(p%dorig(nrow),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + ! Get diagonal D + call psb_sp_getdiag(a,p%dorig,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_getdiag') + goto 9999 + end if + + do i=1,size(p%dorig) + if (p%dorig(i) /= dzero) then + p%dorig(i) = done / p%dorig(i) + else + p%dorig(i) = done + end if + end do + + ! where (p%dorig /= dzero) + ! p%dorig = done / p%dorig + ! elsewhere + ! p%dorig = done + ! end where + + + ! 1. Allocate Ptilde in sparse matrix form + am4%fida='COO' + am4%m=ncol + if (ml_global_nmb) then + am4%k=ntaggr + call psb_sp_all(ncol,ntaggr,am4,ncol,info) + else + am4%k=naggr + call psb_sp_all(ncol,naggr,am4,ncol,info) + endif + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + if (ml_global_nmb) then + do i=1,ncol + am4%aspk(i) = done + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(psb_nnz_) = ncol + else + do i=1,nrow + am4%aspk(i) = done + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(psb_nnz_) = nrow + endif + + + + + call psb_ipcoo2csr(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + call psb_sp_clone(a,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + + ! + ! WARNING: the cycles below assume that AM3 does have + ! its diagonal elements stored explicitly!!! + ! Should we switch to something safer? + ! + call psb_sp_scal(am3,p%dorig,info) + if(info /= 0) goto 9999 + + if (p%iprcparm(om_choice_) == lib_choice_) then + + if (p%iprcparm(smth_kind_) == smth_biz_) then + + ! + ! This only works with CSR. + ! + anorm = dzero + dg = done + do i=1,am3%m + tmp = dzero + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) <= am3%m) then + tmp = tmp + dabs(am3%aspk(j)) + endif + if (am3%ia1(j) == i ) then + dg = dabs(am3%aspk(j)) + end if + end do + anorm = max(anorm,tmp/dg) + enddo + + call psb_amx(ictxt,anorm) + else + anorm = psb_spnrmi(am3,desc_a,info) + endif + omega = 4.d0/(3.d0*anorm) + p%dprcparm(smooth_omega_) = omega + + else if (p%iprcparm(om_choice_) == user_choice_) then + + omega = p%dprcparm(smooth_omega_) + + else if (p%iprcparm(om_choice_) /= user_choice_) then + write(0,*) me,'Error: invalid choice for OMEGA in blaggrmat?? ',& + & p%iprcparm(om_choice_) + end if + + + if (am3%fida=='CSR') then + do i=1,am3%m + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) == i) then + am3%aspk(j) = done - omega*am3%aspk(j) + else + am3%aspk(j) = - omega*am3%aspk(j) + end if + end do + end do + else if (am3%fida=='COO') then + do j=1,am3%infoa(psb_nnz_) + if (am3%ia1(j) /= am3%ia2(j)) then + am3%aspk(j) = - omega*am3%aspk(j) + else + am3%aspk(j) = done - omega*am3%aspk(j) + endif + end do + call psb_ipcoo2csr(am3,info) + else + write(0,*) 'Missing implementation of I sum' + call psb_errpush(4010,name) + goto 9999 + end if + + if (test_dump) then + open(30+me) + write(30+me,*) 'OMEGA: ',omega + do i=1,size(p%dorig) + write(30+me,*) p%dorig(i) + end do + close(30+me) + end if + + if (test_dump) call & + & psb_csprt(20+me,am4,head='% Operator Ptilde.',ivr=desc_a%loc_to_glob) + if (test_dump) call psb_csprt(40+me,am3,head='% (I-wDA)',ivr=desc_a%loc_to_glob,& + & ivc=desc_a%loc_to_glob) + if (debug) write(0,*) me,'Done gather, going for SYMBMM 1' + ! + ! Symbmm90 does the allocation for its result. + ! + ! am1 = (i-wDA)Ptilde + ! Doing it this way means to consider diag(Ai) + ! + ! + call psb_symbmm(am3,am4,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 1') + goto 9999 + end if + + call psb_numbmm(am3,am4,am1) + + if (debug) write(0,*) me,'Done NUMBMM 1' + + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + if (ml_global_nmb) then + ! + ! Now we have to gather the halo of am1, and add it to itself + ! to multiply it by A, + ! + call psb_sphalo(am1,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sphalo') + goto 9999 + end if + + call psb_rwextd(ncol,am1,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + else + + call psb_rwextd(ncol,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='rwextd') + goto 9999 + end if + endif + + if (test_dump) & + & call psb_csprt(60+me,am1,head='% (I-wDA)Pt',ivr=desc_a%loc_to_glob) + + call psb_symbmm(a,am1,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 2') + goto 9999 + end if + + call psb_numbmm(a,am1,am3) + if (debug) write(0,*) me,'Done NUMBMM 2' + + if (p%iprcparm(smth_kind_) == smth_omg_) then + call psb_transp(am1,am2,fmt='COO') + nzl = am2%infoa(psb_nnz_) + i=0 + ! + ! Now we have to fix this. The only rows of B that are correct + ! are those corresponding to "local" aggregates, i.e. indices in p%mlia(:) + ! + do k=1, nzl + if ((naggrm1 < am2%ia1(k)) .and.(am2%ia1(k) <= naggrp1)) then + i = i+1 + am2%aspk(i) = am2%aspk(k) + am2%ia1(i) = am2%ia1(k) + am2%ia2(i) = am2%ia2(k) + end if + end do + + am2%infoa(psb_nnz_) = i + call psb_ipcoo2csr(am2,info) + else + call psb_transp(am1,am2) + endif + if (debug) write(0,*) me,'starting sphalo/ rwxtd' + + if (p%iprcparm(smth_kind_) == smth_omg_) then + ! am2 = ((i-wDA)Ptilde)^T + call psb_sphalo(am3,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sphalo') + goto 9999 + end if + call psb_rwextd(ncol,am3,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + else if (p%iprcparm(smth_kind_) == smth_biz_) then + + call psb_rwextd(ncol,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + endif + + if (debug) write(0,*) me,'starting symbmm 3' + call psb_symbmm(am2,am3,b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 3') + goto 9999 + end if + + if (debug) write(0,*) me,'starting numbmm 3' + call psb_numbmm(am2,am3,b) + if (debug) write(0,*) me,'Done NUMBMM 3' + +!!$ if (aggr_dump) call csprt(50+me,am1,head='% Operator PTrans.') + call psb_sp_free(am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + call psb_ipcsr2coo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcsr2coo') + goto 9999 + end if + + call psb_fixcoo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='fixcoo') + goto 9999 + end if + + + if (test_dump) call psb_csprt(80+me,b,head='% Smoothed aggregate AC.') + + select case(p%iprcparm(smth_kind_)) + + case(smth_omg_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_sp_clone(b,ac,info) + if(info /= 0) goto 9999 + nzac = ac%infoa(psb_nnz_) + nzl = ac%infoa(psb_nnz_) + + allocate(ivall(ntaggr),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + i = 1 + do ip=1,np + do k=1, p%nlaggr(ip) + ivall(i) = ip + i = i + 1 + end do + end do + + call psb_cdall(ictxt,desc_ac,info,vg=ivall(1:ntaggr),flag=1) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + + + call psb_cdins(nzl,ac%ia1,ac%ia2,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdins') + goto 9999 + end if + + if (debug) write(0,*) me,'Created aux descr. distr.' + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + + + if (debug) write(0,*) me,'Asmbld aux descr. distr.' + + call psb_glob_to_loc(ac%ia1(1:nzl),desc_ac,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + call psb_glob_to_loc(ac%ia2(1:nzl),desc_ac,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + ac%m=desc_ac%matrix_data(psb_n_row_) + ac%k=desc_ac%matrix_data(psb_n_col_) + ac%fida='COO' + ac%descra='G' + + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + + deallocate(ivall,nzbr,idisp) + + ! Split AC=M+N N off-diagonal part + call psb_sp_all(ac%m,ac%k,p%av(ap_nd_),nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + + k=0 + do i=1,nzl + if (ac%ia2(i)>ac%m) then + k = k + 1 + p%av(ap_nd_)%aspk(k) = ac%aspk(i) + p%av(ap_nd_)%ia1(k) = ac%ia1(i) + p%av(ap_nd_)%ia2(k) = ac%ia2(i) + endif + enddo + p%av(ap_nd_)%infoa(psb_nnz_) = k + call psb_ipcoo2csr(p%av(ap_nd_),info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + + + if (np>1) then + nzl = psb_sp_get_nnzeros(am1) + call psb_glob_to_loc(am1%ia1(1:nzl),desc_ac,info,'I') + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + endif + am1%k=desc_ac%matrix_data(psb_n_col_) + + if (np>1) then + call psb_ipcsr2coo(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcsr2coo') + goto 9999 + end if + + nzl = am2%infoa(psb_nnz_) + call psb_glob_to_loc(am2%ia1(1:nzl),desc_ac,info,'I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + + call psb_ipcoo2csr(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + end if + am2%m=desc_ac%matrix_data(psb_n_col_) + + if (debug) write(0,*) me,'Done ac ' + case(mat_repl_) + ! + ! + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = b%infoa(psb_nnz_) + + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) goto 9999 + + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,ac%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) goto 9999 + + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) goto 9999 + call psb_sp_free(b,info) + if(info /= 0) goto 9999 + if (me==0) then + if (test_dump) call psb_csprt(80+me,ac,head='% Smoothed aggregate AC.') + endif + + deallocate(nzbr,idisp) + + case default + write(0,*) 'Inconsistent input in smooth_new_aggregate' + end select + + + case(smth_biz_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_sp_clone(b,ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + + case(mat_repl_) + ! + ! + + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = b%infoa(psb_nnz_) + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,ac%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_fixcoo') + goto 9999 + end if + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + end select + deallocate(nzbr,idisp) + + end select + + call psb_ipcoo2csr(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + if (debug) write(0,*) me,'Done smooth_aggregate ' + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + + end subroutine smooth_aggregate + +end subroutine psb_dbldaggrmat diff --git a/psb_ddiagsc_bld.f90 b/psb_ddiagsc_bld.f90 new file mode 100644 index 00000000..7c3c6eb0 --- /dev/null +++ b/psb_ddiagsc_bld.f90 @@ -0,0 +1,168 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_ddiagsc_bld(a,desc_a,p,upd,info) + use psb_base_mod + use psb_prec_type + Implicit None + + type(psb_dspmat_type), target :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type),intent(inout) :: p + character, intent(in) :: upd + integer, intent(out) :: info + + + ! Local scalars + Integer :: err, n_row, n_col,I,j,k,ictxt,& + & me,np,mglob,lw, err_act + real(kind(1.d0)),allocatable :: gd(:), work(:) + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_diagsc_bld' + + if (debug) write(0,*) 'Entering diagsc_bld' + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + mglob = psb_cd_get_global_rows(desc_a) + + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' + call psb_info(ictxt, me, np) + + if (debug) write(0,*) 'Precond: Diagonal scaling' + ! diagonal scaling + + call psb_realloc(n_col,p%d,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_realloc') + goto 9999 + end if + + call psb_csrws(p%d,a,info,trans='N') + if(info /= 0) then + info=4010 + ch_err='psb_csrws' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_cdcpy(desc_a,p%desc_Data,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdcpy') + goto 9999 + end if + + if (debug) write(ilout+me,*) 'VDIAG ',n_row + do i=1,n_row + if (p%d(i).eq.dzero) then + p%d(i) = done + else + p%d(i) = done/p%d(i) + endif + + if (debug) write(ilout+me,*) i,desc_a%loc_to_glob(i), p%d(i) + if (p%d(i).lt.0.d0) then + write(0,*) me,'Negative RWS? ',i,p%d(i) + endif + end do + if (a%pl(1) /= 0) then + allocate(work(n_row),stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + call psb_gelp('n',a%pl,p%d,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='psb_dgelp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(work) + endif + + if (debug) then + allocate(gd(mglob),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_gather(gd, p%d, desc_a, info, iroot=iroot) + if(info /= 0) then + info=4010 + ch_err='psb_dgatherm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (me.eq.iroot) then + write(iout+np,*) 'VDIAG CHECK ',mglob + do i=1,mglob + write(iout+np,*) i,gd(i) + enddo + endif + deallocate(gd) + endif + if (debug) write(*,*) 'Preconditioner DIAG computed OK' + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_ddiagsc_bld + diff --git a/psb_dgenaggrmap.f90 b/psb_dgenaggrmap.f90 new file mode 100644 index 00000000..245bb453 --- /dev/null +++ b/psb_dgenaggrmap.f90 @@ -0,0 +1,292 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_base_mod + use psb_prec_type + implicit none + integer, intent(in) :: aggr_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, allocatable :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + ! Locals + integer, allocatable :: ils(:), neigh(:) + integer :: icnt,nlp,k,n,ia,isz,nr, naggr,i,j,m + + logical :: recovery + logical, parameter :: debug=.false. + integer ::ictxt,np,me,err_act + integer :: nrow, ncol, n_ne + integer, parameter :: one=1, two=2 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + name = 'psb_bldaggrmat' + call psb_erractionsave(err_act) + ! + ! Note. At the time being we are ignoring aggr_type + ! so that we only have local decoupled aggregation. This might + ! change in the future. + ! + ictxt=psb_cd_get_context(desc_a) + call psb_info(ictxt,me,np) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + nr = a%m + allocate(ilaggr(nr),neigh(nr),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do i=1, nr + ilaggr(i) = -(nr+1) + end do + ! Note: -(nr+1) Untouched as yet + ! -i 1<=i<=nr Adjacent to aggregate i + ! i 1<=i<=nr Belonging to aggregate i + + ! + ! Phase one: group nodes together. + ! Very simple minded strategy. + ! + naggr = 0 + nlp = 0 + do + icnt = 0 + do i=1, nr + if (ilaggr(i) == -(nr+1)) then + ! + ! 1. Untouched nodes are marked >0 together + ! with their neighbours + ! + icnt = icnt + 1 + naggr = naggr + 1 + ilaggr(i) = naggr + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + do k=1, n_ne + j = neigh(k) + if ((1<=j).and.(j<=nr)) then + ilaggr(j) = naggr +!!$ if (ilaggr(j) < 0) ilaggr(j) = naggr +!!$ if (ilaggr(j) == -(nr+1)) ilaggr(j) = naggr + endif + enddo + ! + ! 2. Untouched neighbours of these nodes are marked <0. + ! + call psb_neigh(a,i,neigh,n_ne,info,lev=two) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do n = 1, n_ne + m = neigh(n) + if ((1<=m).and.(m<=nr)) then + if (ilaggr(m) == -(nr+1)) ilaggr(m) = -naggr + endif + enddo + endif + enddo + nlp = nlp + 1 + if (icnt == 0) exit + enddo + if (debug) then + write(0,*) 'Check 1:',count(ilaggr == -(nr+1)),(a%ia1(i),i=a%ia2(1),a%ia2(2)-1) + end if + + ! + ! Phase two: sweep over leftovers. + ! + allocate(ils(naggr+10),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + do i=1, size(ils) + ils(i) = 0 + end do + do i=1, nr + n = ilaggr(i) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 1 ? ',n,naggr + else + ils(n) = ils(n) + 1 + end if + + end if + end do + if (debug) then + write(0,*) 'Phase 1: number of aggregates ',naggr + write(0,*) 'Phase 1: nodes aggregated ',sum(ils) + end if + + recovery=.false. + do i=1, nr + if (ilaggr(i) < 0) then + ! + ! Now some silly rule to break ties: + ! Group with smallest adjacent aggregate. + ! + isz = nr+1 + ia = -1 + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do j=1, n_ne + k = neigh(j) + if ((1<=k).and.(k<=nr)) then + n = ilaggr(k) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 2? ',n,naggr + end if + + if (ils(n) < isz) then + ia = n + isz = ils(n) + endif + endif + endif + enddo + if (ia == -1) then + if (ilaggr(i) > -(nr+1)) then + ilaggr(i) = abs(ilaggr(i)) + if (ilaggr(I)>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 3? ',ilaggr(i),naggr + end if + ils(ilaggr(i)) = ils(ilaggr(i)) + 1 + ! + ! This might happen if the pattern is non symmetric. + ! Need a better handling. + ! + recovery = .true. + else + write(0,*) 'Unrecoverable error !!',ilaggr(i), nr + endif + else + ilaggr(i) = ia + if (ia>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 4? ',ia,naggr + end if + + ils(ia) = ils(ia) + 1 + endif + end if + enddo + if (recovery) then + write(0,*) 'Had to recover from strange situation in loc_aggregate.' + write(0,*) 'Perhaps an unsymmetric pattern?' + endif + if (debug) then + write(0,*) 'Phase 2: number of aggregates ',naggr + write(0,*) 'Phase 2: nodes aggregated ',sum(ils) + do i=1, naggr + write(*,*) 'Size of aggregate ',i,' :',count(ilaggr==i), ils(i) + enddo + write(*,*) maxval(ils(1:naggr)) + write(*,*) 'Leftovers ',count(ilaggr<0), ' in ',nlp,' loops' + end if + +!!$ write(0,*) 'desc_a loc_aggr 4 : ', desc_a%matrix_data(m_) + if (count(ilaggr<0) >0) then + write(0,*) 'Fatal error: some leftovers!!!' + endif + + deallocate(ils,neigh,stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + if (nrow /= size(ilaggr)) then + write(0,*) 'SOmething wrong ilaggr ',nrow,size(ilaggr) + endif + call psb_realloc(ncol,ilaggr,info) + if (info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(nlaggr(np),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + nlaggr(:) = 0 + nlaggr(me+1) = naggr + call psb_sum(ictxt,nlaggr(1:np)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dgenaggrmap diff --git a/psb_dilu_bld.f90 b/psb_dilu_bld.f90 new file mode 100644 index 00000000..6744155f --- /dev/null +++ b/psb_dilu_bld.f90 @@ -0,0 +1,366 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +!***************************************************************************** +!* * +!* This is where the action takes place. * +!* ASMATBLD does the setup: building the prec descriptor plus retrieving * +!* matrix rows if needed * +!* * +!* * +!* * +!* * +!* some open code does the renumbering * +!* * +!* * +!* * +!* * +!***************************************************************************** +subroutine psb_dilu_bld(a,desc_a,p,upd,info) + use psb_base_mod + use psb_prec_type + implicit none + ! + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. array Arguments .. + type(psb_dspmat_type), intent(in), target :: a + type(psb_dbaseprc_type), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + character, intent(in) :: upd + + ! .. Local Scalars .. + integer :: i, j, jj, k, kk, m + integer :: int_err(5) + character :: trans, unitd + type(psb_dspmat_type) :: blck, atmp + real(kind(1.d0)) :: t1,t2,t3,t4,t5,t6,mpi_wtime, t7, t8 + external mpi_wtime + logical, parameter :: debugprt=.false., debug=.false., aggr_dump=.false. + integer nztota, nztotb, nztmp, nzl, nnr, ir, err_act,& + & n_row, nrow_a,n_col, nhalo, ind, iind, i1,i2,ia + integer :: ictxt,np,me + character(len=20) :: name, ch_err + + interface psb_ilu_fct + subroutine psb_dilu_fct(a,l,u,d,info,blck) + use psb_base_mod + integer, intent(out) :: info + type(psb_dspmat_type),intent(in) :: a + type(psb_dspmat_type),intent(inout) :: l,u + type(psb_dspmat_type),intent(in), optional, target :: blck + real(kind(1.d0)), intent(inout) :: d(:) + end subroutine psb_dilu_fct + end interface + + interface psb_asmatbld + Subroutine psb_dasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_dasmatbld + end interface + + interface psb_sp_renum + subroutine psb_dsp_renum(a,desc_a,blck,p,atmp,info) + use psb_base_mod + use psb_prec_type + implicit none + type(psb_dspmat_type), intent(in) :: a,blck + type(psb_dspmat_type), intent(inout) :: atmp + type(psb_dbaseprc_type), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dsp_renum + end interface + + if(psb_get_errstatus().ne.0) return + info=0 + name='psb_ilu_bld' + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_a) + call psb_info(ictxt, me, np) + + m = a%m + if (m < 0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + trans = 'N' + unitd = 'U' + if (p%iprcparm(n_ovr_) < 0) then + info = 11 + int_err(1) = 1 + int_err(2) = p%iprcparm(n_ovr_) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + t1= mpi_wtime() + + if(debug) write(0,*)me,': calling psb_asmatbld',p%iprcparm(p_type_),p%iprcparm(n_ovr_) + if (debug) call psb_barrier(ictxt) + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info) + if(info/=0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + t2= mpi_wtime() + if (debug) write(0,*)me,': out of psb_asmatbld' + if (debug) call psb_barrier(ictxt) + + if (allocated(p%av)) then + if (size(p%av) < bp_ilu_avsz) then + call psb_errpush(4010,name,a_err='Insufficient av size') + goto 9999 + endif + else + call psb_errpush(4010,name,a_err='AV not associated') + goto 9999 + endif +!!$ call psb_csprt(50+me,a,head='% (A)') + + nrow_a = psb_cd_get_local_rows(desc_a) + nztota = psb_sp_get_nnzeros(a) + nztotb = psb_sp_get_nnzeros(blck) + if (debug) write(0,*)me,': out get_nnzeros',nztota + if (debug) call psb_barrier(ictxt) + + n_col = psb_cd_get_local_cols(desc_a) + nhalo = n_col-nrow_a + n_row = p%desc_data%matrix_data(psb_n_row_) + p%av(l_pr_)%m = n_row + p%av(l_pr_)%k = n_row + p%av(u_pr_)%m = n_row + p%av(u_pr_)%k = n_row + call psb_sp_all(n_row,n_row,p%av(l_pr_),nztota+nztotb,info) + if (info == 0) call psb_sp_all(n_row,n_row,p%av(u_pr_),nztota+nztotb,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (allocated(p%d)) then + if (size(p%d) < n_row) then + deallocate(p%d) + endif + endif + if (.not.allocated(p%d)) then + allocate(p%d(n_row),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + + + if (debug) then + write(0,*) me,'Done psb_asmatbld' + call psb_barrier(ictxt) + endif + + + if (p%iprcparm(iren_) > 0) then + + ! + ! Here we allocate a full copy to hold local A and received BLK + ! + + nztota = psb_sp_get_nnzeros(a) + nztotb = psb_sp_get_nnzeros(blck) + call psb_sp_all(atmp,nztota+nztotb,info) + if(info/=0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + + ! write(0,*) 'ILU_BLD ',nztota,nztotb,a%m + + call psb_sp_renum(a,desc_a,blck,p,atmp,info) + + if(info/=0) then + info=4010 + ch_err='psb_sp_renum' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + t3 = mpi_wtime() + if (debugprt) then + call psb_barrier(ictxt) + open(40+me) + call psb_csprt(40+me,atmp,head='% Local matrix') + close(40+me) + endif + if (debug) write(0,*) me,' Factoring rows ',& + &atmp%m,a%m,blck%m,atmp%ia2(atmp%m+1)-1 + + ! + ! Ok, factor the matrix. + ! + t5 = mpi_wtime() + blck%m=0 + call psb_ilu_fct(atmp,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_ilu_fct' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_sp_free(atmp,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + else if (p%iprcparm(iren_) == 0) then + t3 = mpi_wtime() + ! This is where we have mo renumbering, thus no need + ! for ATMP + + if (debugprt) then + open(40+me) + call psb_barrier(ictxt) + call psb_csprt(40+me,a,iv=p%desc_data%loc_to_glob,& + & head='% Local matrix') + if (p%iprcparm(p_type_)==asm_) then + call psb_csprt(40+me,blck,iv=p%desc_data%loc_to_glob,& + & irs=a%m,head='% Received rows') + endif + close(40+me) + endif + + t5= mpi_wtime() + if (debug) write(0,*) me,' Going for ilu_fct' + if (debug) call psb_barrier(ictxt) + call psb_ilu_fct(a,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_ilu_fct' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) me,' Done dilu_fct' + endif + + + if (debugprt) then + ! + ! Print out the factors on file. + ! + open(80+me) + + call psb_csprt(80+me,p%av(l_pr_),head='% Local L factor') + write(80+me,*) '% Diagonal: ',p%av(l_pr_)%m + do i=1,p%av(l_pr_)%m + write(80+me,*) i,i,p%d(i) + enddo + call psb_csprt(80+me,p%av(u_pr_),head='% Local U factor') + + close(80+me) + endif + +!!$ call psb_csprt(60+me,a,head='% (A)') + + + ! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" ) + t6 = mpi_wtime() + ! + ! write(0,'(i3,1x,a,3(1x,g18.9))') me,'renum/factor time',t3-t2,t6-t5 + ! if (me==0) write(0,'(a,3(1x,g18.9))') 'renum/factor time',t3-t2,t6-t5 + + call psb_sp_free(blck,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (psb_sp_getifld(psb_upd_,p%av(u_pr_),info) /= psb_upd_perm_) then + call psb_sp_trimsize(p%av(u_pr_),i1,i2,ia,info) + if (info == 0) call psb_sp_reall(p%av(u_pr_),i1,i2,ia,info) + endif + + if (psb_sp_getifld(psb_upd_,p%av(l_pr_),info) /= psb_upd_perm_) then + call psb_sp_trimsize(p%av(l_pr_),i1,i2,ia,info) + if (info == 0) call psb_sp_reall(p%av(l_pr_),i1,i2,ia,info) + endif + + + if (debug) write(0,*) me,'End of ilu_bld' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + +end subroutine psb_dilu_bld + + diff --git a/psb_dilu_fct.f90 b/psb_dilu_fct.f90 new file mode 100644 index 00000000..4b965702 --- /dev/null +++ b/psb_dilu_fct.f90 @@ -0,0 +1,475 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dilu_fct(a,l,u,d,info,blck) + + ! + ! This routine copies and factors "on the fly" from A and BLCK + ! into L/D/U. + ! + ! + use psb_base_mod + implicit none + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. Array Arguments .. + type(psb_dspmat_type),intent(in) :: a + type(psb_dspmat_type),intent(inout) :: l,u + type(psb_dspmat_type),intent(in), optional, target :: blck + real(kind(1.d0)), intent(inout) :: d(:) + ! .. Local Scalars .. + real(kind(1.d0)) :: dia, temp + integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act + + type(psb_dspmat_type), pointer :: blck_ + character(len=20) :: name, ch_err + logical, parameter :: debug=.false. + name='psb_dcsrlu' + info = 0 + call psb_erractionsave(err_act) + ! .. Executable Statements .. + ! + + if (present(blck)) then + blck_ => blck + else + allocate(blck_,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_nullify_sp(blck_) ! Why do we need this? Who knows.... + call psb_sp_all(0,0,blck_,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + blck_%m=0 + endif + +!!$ write(0,*) 'ilu_fct: ',size(l%ia2),size(u%ia2),a%m,blck_%m + call psb_dilu_fctint(m,a%m,a,blck_%m,blck_,& + & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info) + if(info.ne.0) then + info=4010 + ch_err='psb_dilu_fctint' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l%infoa(1) = l1 + l%fida = 'CSR' + l%descra = 'TLU' + u%infoa(1) = l2 + u%fida = 'CSR' + u%descra = 'TUU' + l%m = m + l%k = m + u%m = m + u%k = m + if (present(blck)) then + blck_ => null() + else + call psb_sp_free(blck_,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + deallocate(blck_) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + subroutine psb_dilu_fctint(m,ma,a,mb,b,& + & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) + implicit none + + type(psb_dspmat_type) :: a,b + integer :: m,ma,mb,l1,l2,info + integer, dimension(*) :: lia1,lia2,uia1,uia2 + real(kind(1.d0)), dimension(*) :: laspk,uaspk,d + + integer :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act + real(kind(1.d0)) :: dia,temp + integer, parameter :: nrb=16 + logical,parameter :: debug=.false. + type(psb_dspmat_type) :: trw + integer :: int_err(5) + character(len=20) :: name, ch_err + + name='psb_dilu_fctint' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + call psb_nullify_sp(trw) + trw%m=0 + trw%k=0 + if(debug) write(0,*)'LUINT Allocating TRW' + call psb_sp_all(trw,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'LUINT Done Allocating TRW' + lia2(1) = 1 + uia2(1) = 1 + l1=0 + l2=0 + m = ma+mb + if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb + + do i = 1, ma + if(debug) write(0,*)'LUINT: Loop index ',i,ma + d(i) = 0.d0 + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) to handle + ! anything. + ! + if (a%fida=='CSR') then + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = a%aspk(j) + lia1(l1) = k + else if (k == i) then + d(i) = a%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = a%aspk(j) + uia1(l2) = k + end if + enddo + + else + + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(ma-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + end if + +!!$ + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloop: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloop + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + ! write(0,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) + dia = dia - temp*uaspk(jj) + ! write(0,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) + cycle updateloop + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloop + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloop + enddo + ! + ! + ! Non singularity + ! + if (dabs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + int_err(1) = i + write(ch_err,'(g20.10)') dia + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + dia = 1.d0/dia + end if + d(i) = dia + ! write(6,*)'diag(',i,')=',d(i) + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + do i = ma+1, m + d(i) = 0.d0 + + + if (b%fida=='CSR') then + + do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 + k = b%ia1(j) + ! if (me.eq.2) write(0,*)'ecco k=',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = b%aspk(j) + lia1(l1) = k + ! if(me.eq.2) write(0,*)'scrivo l' + else if (k == i) then + d(i) = b%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = b%aspk(j) + ! write(0,*)'KKKKK',k + uia1(l2) = k + end if + enddo + + else + + if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + endif + + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloopb: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloopb + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + dia = dia - temp*uaspk(jj) + cycle updateloopb + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloopb + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloopb + enddo + ! + ! + ! Non singularity + ! + if (dabs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + int_err(1) = i + write(ch_err,'(g20.10)') dia + info = 2 + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + dia = 1.d0/dia + end if + d(i) = dia + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + call psb_sp_free(trw,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'Leaving ilu_fct' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + end subroutine psb_dilu_fctint +end subroutine psb_dilu_fct diff --git a/psb_dmlprc_aply.f90 b/psb_dmlprc_aply.f90 new file mode 100644 index 00000000..b228130d --- /dev/null +++ b/psb_dmlprc_aply.f90 @@ -0,0 +1,782 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dmlprc_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a multilevel preconditioner stored in baseprecv + ! + ! cfr.: Smith, Biorstad & Gropp + ! Domain Decomposition + ! Cambridge Univ. Press + ! + ! To each level I there corresponds a matrix A(I) and a preconditioner K(I) + ! + ! A notational difference: in the DD reference above the preconditioner for + ! a given level K(I) is written out as a sum over the subdomains + ! + ! SUM_k(R_k^T A_k R_k) + ! + ! whereas in this code the sum is implicit in the parallelization, + ! i.e. each process takes care of one subdomain, and for each level we have + ! as many subdomains as there are processes (except for the coarsest level where + ! we might have a replicated index space). Thus the sum apparently disappears + ! from our code, but only apparently, because it is implicit in the call + ! to psb_baseprc_aply. + ! + ! A bit of description of the baseprecv(:) data structure: + ! 1. Number of levels = NLEV = size(baseprecv(:)) + ! 2. baseprecv(ilev)%av(:) sparse matrices needed for the current level. + ! Includes: + ! 2.1.: baseprecv(ilev)%av(l_pr_) L factor of ILU preconditioners + ! 2.2.: baseprecv(ilev)%av(u_pr_) U factor of ILU preconditioners + ! 2.3.: baseprecv(ilev)%av(ap_nd_) Off-diagonal part of A for Jacobi sweeps + ! 2.4.: baseprecv(ilev)%av(ac_) Aggregated matrix of level ILEV + ! 2.5.: baseprecv(ilev)%av(sm_pr_t_) Smoother prolongator transpose; maps vectors + ! (ilev-1) ---> (ilev) + ! 2.6.: baseprecv(ilev)%av(sm_pr_) Smoother prolongator; maps vectors + ! (ilev) ---> (ilev-1) + ! Shouldn't we keep just one of them and handle transpose in the sparse BLAS? maybe + ! + ! 3. baseprecv(ilev)%desc_data comm descriptor for level ILEV + ! 4. baseprecv(ilev)%base_a Pointer (really a pointer!) to the base matrix + ! of the current level, i.e.: if ILEV=1 then A + ! else the aggregated matrix av(ac_); so we have + ! a unified treatment of residuals. Need this to + ! avoid passing explicitly matrix A to the + ! outer prec. routine + ! 5. baseprecv(ilev)%mlia The aggregation map from (ilev-1)-->(ilev) + ! if no smoother, it is used instead of sm_pr_ + ! 6. baseprecv(ilev)%nlaggr Number of aggregates on the various procs. + ! + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: baseprecv(:) + real(kind(0.d0)),intent(in) :: alpha,beta + real(kind(0.d0)),intent(inout) :: x(:), y(:) + character :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + + + + ! Local variables + integer :: n_row,n_col + character ::diagl, diagu + integer :: ictxt,np,me,i, isz, nrg,nr2l,err_act, iptype, int_err(5) + real(kind(1.d0)) :: omega + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical, parameter :: debug=.false., debugprt=.false. + integer :: ismth, nlev, ilev + external mpi_wtime + character(len=20) :: name, ch_err + + type psb_mlprec_wrk_type + real(kind(1.d0)), allocatable :: tx(:), ty(:), x2l(:), y2l(:) + end type psb_mlprec_wrk_type + type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) + + interface psb_baseprc_aply + subroutine psb_dbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_dbaseprc_aply + end interface + + name='psb_mlprc_aply' + info = 0 + call psb_erractionsave(err_act) + + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + nlev = size(baseprecv) + allocate(mlprec_wrk(nlev),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + select case(baseprecv(2)%iprcparm(ml_type_)) + + case(no_ml_) + ! Should not really get here. + call psb_errpush(4010,name,a_err='no_ml_ in mlprc_aply?') + goto 9999 + + + case(add_ml_prec_) + + + ! + ! Additive is very simple. + ! 1. X(1) = Xext + ! 2. DO ILEV=2,NLEV + ! X(ILEV) = AV(PR_SM_T_)*X(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! 3. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = AV(PR_SM_)*Y(ILEV+1) + ! 4. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + + + call psb_baseprc_aply(alpha,baseprecv(1),x,beta,y,& + & baseprecv(1)%base_desc,trans,work,info) + if(info /=0) goto 9999 + allocate(mlprec_wrk(1)%x2l(size(x)),mlprec_wrk(1)%y2l(size(y))) + mlprec_wrk(1)%x2l(:) = x(:) + + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + allocate(mlprec_wrk(ilev)%x2l(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%tx(max(n_row,n_col)),& + & mlprec_wrk(ilev)%ty(max(n_row,n_col)), stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = dzero + mlprec_wrk(ilev)%y2l(:) = dzero + mlprec_wrk(ilev)%tx(1:n_row) = mlprec_wrk(ilev-1)%x2l(1:n_row) + mlprec_wrk(ilev)%tx(n_row+1:max(n_row,n_col)) = dzero + mlprec_wrk(ilev)%ty(:) = dzero + + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + + + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + call psb_halo(mlprec_wrk(ilev-1)%x2l,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%x2l(n_row+1:max(n_row,n_col)) = dzero + end if + + call psb_csmm(done,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%x2l,& + & dzero,mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%x2l(i) + end do + + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) Then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + call psb_baseprc_aply(done,baseprecv(ilev),& + & mlprec_wrk(ilev)%x2l,dzero,mlprec_wrk(ilev)%y2l,& + & baseprecv(ilev)%desc_data, 'N',work,info) + + enddo + + do ilev =nlev,2,-1 + + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + + if (ismth /= no_smth_) then + + call psb_csmm(done,baseprecv(ilev)%av(sm_pr_),mlprec_wrk(ilev)%y2l,& + & done,mlprec_wrk(ilev-1)%y2l,info) + if(info /=0) goto 9999 + + else + + do i=1, n_row + mlprec_wrk(ilev-1)%y2l(i) = mlprec_wrk(ilev-1)%y2l(i) + & + & mlprec_wrk(ilev)%y2l(baseprecv(ilev)%mlia(i)) + enddo + + end if + end do + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,done,y,baseprecv(1)%base_desc,info) + if(info /=0) goto 9999 + + + case(mult_ml_prec_) + + ! + ! Multiplicative multilevel + ! Pre/post smoothing versions. + ! + + select case(baseprecv(2)%iprcparm(smth_pos_)) + + case(post_smooth_) + + + ! + ! Post smoothing. + ! 1. X(1) = Xext + ! 2. DO ILEV=2, NLEV :: X(ILEV) = AV(PR_SM_T_,ILEV)*X(ILEV-1) + ! 3. Y(NLEV) = (K(NLEV)**(-1))*X(NLEV) + ! 4. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! Y(ILEV) = Y(ILEV) + (K(ILEV)**(-1))*(X(ILEV)-A(ILEV)*Y(ILEV)) + ! + ! 5. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! Also: post smoothing in the ref. DD is only presented for NLEV=2. + ! + ! + + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%tx(nr2l), stat=info) + mlprec_wrk(1)%x2l(:) = dzero + mlprec_wrk(1)%y2l(:) = dzero + mlprec_wrk(1)%tx(:) = dzero + + call psb_geaxpby(done,x,dzero,mlprec_wrk(1)%tx,& + & baseprecv(1)%base_desc,info) + call psb_geaxpby(done,x,dzero,mlprec_wrk(1)%x2l,& + & baseprecv(1)%base_desc,info) + + do ilev=2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth = baseprecv(ilev)%iprcparm(smth_kind_) + + allocate(mlprec_wrk(ilev)%tx(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = dzero + mlprec_wrk(ilev)%y2l(:) = dzero + mlprec_wrk(ilev)%tx(:) = dzero + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + call psb_halo(mlprec_wrk(ilev-1)%x2l,& + & baseprecv(ilev-1)%base_desc,info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%x2l(n_row+1:max(n_row,n_col)) = dzero + end if + + call psb_csmm(done,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%x2l, & + & dzero,mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%x2l(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) Then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + call psb_geaxpby(done,mlprec_wrk(ilev)%x2l,dzero,mlprec_wrk(ilev)%tx,& + & baseprecv(ilev)%base_desc,info) + if(info /=0) goto 9999 + + enddo + + + call psb_baseprc_aply(done,baseprecv(nlev),mlprec_wrk(nlev)%x2l, & + & dzero, mlprec_wrk(nlev)%y2l,baseprecv(nlev)%desc_data,'N',work,info) + + if(info /=0) goto 9999 + + + do ilev=nlev-1, 1, -1 + ismth = baseprecv(ilev+1)%iprcparm(smth_kind_) + if (ismth /= no_smth_) then + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,baseprecv(ilev+1)%desc_data,& + & info,work=work) + call psb_csmm(done,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & dzero,mlprec_wrk(ilev)%y2l,info) + if(info /=0) goto 9999 + + else + n_row = baseprecv(ilev)%base_desc%matrix_data(psb_n_row_) + mlprec_wrk(ilev)%y2l(:) = dzero + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + call psb_spmm(-done,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & done,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + + if(info /=0) goto 9999 + + call psb_baseprc_aply(done,baseprecv(ilev),mlprec_wrk(ilev)%tx,& + & done,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%base_desc, trans, work,info) + + if(info /=0) goto 9999 + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + + case(pre_smooth_) + + + ! + ! Pre smoothing. + ! 1. X(1) = Xext + ! 2. Y(1) = (K(1)**(-1))*X(1) + ! 3. TX(1) = X(1) - A(1)*Y(1) + ! 4. DO ILEV=2, NLEV + ! X(ILEV) = AV(PR_SM_T_,ILEV)*TX(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! TX(ILEV) = (X(ILEV)-A(ILEV)*Y(ILEV)) + ! 5. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = Y(ILEV) + AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! 6. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! + + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%tx(nr2l), stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(1)%y2l(:) = dzero + mlprec_wrk(1)%x2l(:) = x + + call psb_baseprc_aply(done,baseprecv(1),mlprec_wrk(1)%x2l,& + & dzero,mlprec_wrk(1)%y2l,& + & baseprecv(1)%base_desc,& + & trans,work,info) + + if(info /=0) goto 9999 + + mlprec_wrk(1)%tx = mlprec_wrk(1)%x2l + + call psb_spmm(-done,baseprecv(1)%base_a,mlprec_wrk(1)%y2l,& + & done,mlprec_wrk(1)%tx,baseprecv(1)%base_desc,info,work=work) + if(info /=0) goto 9999 + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth = baseprecv(ilev)%iprcparm(smth_kind_) + allocate(mlprec_wrk(ilev)%tx(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = dzero + mlprec_wrk(ilev)%y2l(:) = dzero + mlprec_wrk(ilev)%tx(:) = dzero + + + if (ismth /= no_smth_) then + ! + !Smoothed Aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + + call psb_halo(mlprec_wrk(ilev-1)%tx,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%tx(n_row+1:max(n_row,n_col)) = dzero + end if + + call psb_csmm(done,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%tx,dzero,& + & mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcuts + ! + mlprec_wrk(ilev)%x2l = dzero + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%tx(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + + call psb_baseprc_aply(done,baseprecv(ilev),mlprec_wrk(ilev)%x2l,& + & dzero,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%desc_data, 'N',work,info) + + if(info /=0) goto 9999 + + if(ilev < nlev) then + mlprec_wrk(ilev)%tx = mlprec_wrk(ilev)%x2l + call psb_spmm(-done,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & done,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + if(info /=0) goto 9999 + endif + + enddo + + do ilev = nlev-1, 1, -1 + + ismth=baseprecv(ilev+1)%iprcparm(smth_kind_) + + if (ismth /= no_smth_) then + + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,& + & baseprecv(ilev+1)%desc_data,info,work=work) + call psb_csmm(done,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & done,mlprec_wrk(ilev)%y2l,info) + + if(info /=0) goto 9999 + + else + + n_row = baseprecv(ilev+1)%base_desc%matrix_data(psb_n_row_) + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,& + & baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + + + case(smooth_both_) + + ! + ! Symmetrized smoothing. + ! 1. X(1) = Xext + ! 2. Y(1) = (K(1)**(-1))*X(1) + ! 3. TX(1) = X(1) - A(1)*Y(1) + ! 4. DO ILEV=2, NLEV + ! X(ILEV) = AV(PR_SM_T_,ILEV)*TX(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! TX(ILEV) = (X(ILEV)-A(ILEV)*Y(ILEV)) + ! 5. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = Y(ILEV) + AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! Y(ILEV) = Y(ILEV) + (K(ILEV)**(-1))*(X(ILEV)-A(ILEV)*Y(ILEV)) + ! 6. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%ty(nr2l), mlprec_wrk(1)%tx(nr2l), stat=info) + + mlprec_wrk(1)%x2l(:) = dzero + mlprec_wrk(1)%y2l(:) = dzero + mlprec_wrk(1)%tx(:) = dzero + mlprec_wrk(1)%ty(:) = dzero + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_geaxpby(done,x,dzero,mlprec_wrk(1)%x2l,& + & baseprecv(1)%base_desc,info) + call psb_geaxpby(done,x,dzero,mlprec_wrk(1)%tx,& + & baseprecv(1)%base_desc,info) + + call psb_baseprc_aply(done,baseprecv(1),mlprec_wrk(1)%x2l,& + & dzero,mlprec_wrk(1)%y2l,& + & baseprecv(1)%base_desc,& + & trans,work,info) + + if(info /=0) goto 9999 + + mlprec_wrk(1)%ty = mlprec_wrk(1)%x2l + + call psb_spmm(-done,baseprecv(1)%base_a,mlprec_wrk(1)%y2l,& + & done,mlprec_wrk(1)%ty,baseprecv(1)%base_desc,info,work=work) + if(info /=0) goto 9999 + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + allocate(mlprec_wrk(ilev)%ty(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + mlprec_wrk(ilev)%x2l(:) = dzero + mlprec_wrk(ilev)%y2l(:) = dzero + mlprec_wrk(ilev)%tx(:) = dzero + mlprec_wrk(ilev)%ty(:) = dzero + + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + if (ismth /= no_smth_) then + ! + !Smoothed Aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + + call psb_halo(mlprec_wrk(ilev-1)%ty,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%ty(n_row+1:max(n_row,n_col)) = dzero + end if + + call psb_csmm(done,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%ty,dzero,& + & mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcuts + ! + mlprec_wrk(ilev)%x2l = dzero + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%ty(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + call psb_geaxpby(done,mlprec_wrk(ilev)%x2l,dzero,mlprec_wrk(ilev)%tx,& + & baseprecv(ilev)%base_desc,info) + if(info /=0) goto 9999 + + call psb_baseprc_aply(done,baseprecv(ilev),mlprec_wrk(ilev)%x2l,& + & dzero,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%desc_data, 'N',work,info) + + if(info /=0) goto 9999 + + if(ilev < nlev) then + mlprec_wrk(ilev)%ty = mlprec_wrk(ilev)%x2l + call psb_spmm(-done,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & done,mlprec_wrk(ilev)%ty,baseprecv(ilev)%base_desc,info,work=work) + if(info /=0) goto 9999 + endif + + enddo + + + do ilev=nlev-1, 1, -1 + + ismth=baseprecv(ilev+1)%iprcparm(smth_kind_) + if (ismth /= no_smth_) then + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,baseprecv(ilev+1)%desc_data,& + & info,work=work) + call psb_csmm(done,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & done,mlprec_wrk(ilev)%y2l,info) + if(info /=0) goto 9999 + + else + n_row = baseprecv(ilev)%base_desc%matrix_data(psb_n_row_) + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + call psb_spmm(-done,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & done,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + + if(info /=0) goto 9999 + + call psb_baseprc_aply(done,baseprecv(ilev),mlprec_wrk(ilev)%tx,& + & done,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%base_desc, trans, work,info) + + if(info /=0) goto 9999 + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,& + & baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + case default + + call psb_errpush(4013,name,a_err='wrong smooth_pos',& + & i_Err=(/baseprecv(2)%iprcparm(smth_pos_),0,0,0,0/)) + goto 9999 + + end select + + case default + call psb_errpush(4013,name,a_err='wrong mltype',& + & i_Err=(/baseprecv(2)%iprcparm(ml_type_),0,0,0,0/)) + goto 9999 + + end select + + deallocate(mlprec_wrk) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +!!$contains +!!$ subroutine mlprec_wrk_free(wrk) +!!$ type(psb_mlprec_wrk_type) :: wrk(:) +!!$ ! This will not be needed when we have allocatables, as +!!$ ! it is sufficient to deallocate the container, and +!!$ ! the compiler is supposed to recursively deallocate the +!!$ ! various components. +!!$ integer i +!!$ +!!$ do i=1, size(wrk) +!!$ if (associated(wrk(i)%tx)) deallocate(wrk(i)%tx) +!!$ if (associated(wrk(i)%ty)) deallocate(wrk(i)%ty) +!!$ if (associated(wrk(i)%x2l)) deallocate(wrk(i)%x2l) +!!$ if (associated(wrk(i)%y2l)) deallocate(wrk(i)%y2l) +!!$ if (associated(wrk(i)%b2l)) deallocate(wrk(i)%b2l) +!!$ if (associated(wrk(i)%tty)) deallocate(wrk(i)%tty) +!!$ end do +!!$ end subroutine mlprec_wrk_free + +end subroutine psb_dmlprc_aply + diff --git a/psb_dmlprc_bld.f90 b/psb_dmlprc_bld.f90 new file mode 100644 index 00000000..56ca6bd4 --- /dev/null +++ b/psb_dmlprc_bld.f90 @@ -0,0 +1,198 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dmlprc_bld(a,desc_a,p,info) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_dspmat_type), intent(in), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_dbaseprc_type), intent(inout),target :: p + integer, intent(out) :: info + + type(psb_desc_type) :: desc_ac + + integer :: i, nrg, nzg, err_act,k + character(len=20) :: name, ch_err + logical, parameter :: debug=.false. + type(psb_dspmat_type) :: ac + + interface psb_baseprc_bld + subroutine psb_dbaseprc_bld(a,desc_a,p,info,upd) + use psb_base_mod + use psb_prec_type + type(psb_dspmat_type), target :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + end subroutine psb_dbaseprc_bld + end interface + + interface psb_genaggrmap + subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_base_mod + use psb_prec_type + implicit none + integer, intent(in) :: aggr_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, allocatable :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + end subroutine psb_dgenaggrmap + end interface + + interface psb_bldaggrmat + subroutine psb_dbldaggrmat(a,desc_a,ac,desc_ac,p,info) + use psb_base_mod + use psb_prec_type + type(psb_dspmat_type), intent(in), target :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(out),target :: ac + type(psb_desc_type), intent(inout) :: desc_ac + type(psb_dbaseprc_type), intent(inout), target :: p + integer, intent(out) :: info + end subroutine psb_dbldaggrmat + end interface + + integer :: ictxt, np, me + + name='psb_mlprec_bld' + if (psb_get_errstatus().ne.0) return + info = 0 + ictxt = psb_cd_get_context(desc_a) + call psb_info(ictxt,me,np) + call psb_erractionsave(err_act) + call psb_nullify_sp(ac) + + + if (.not.allocated(p%iprcparm)) then + info = 2222 + call psb_errpush(info,name) + goto 9999 + endif + call psb_check_def(p%iprcparm(ml_type_),'Multilevel type',& + & mult_ml_prec_,is_legal_ml_type) + call psb_check_def(p%iprcparm(aggr_alg_),'aggregation',& + & loc_aggr_,is_legal_ml_aggr_kind) + call psb_check_def(p%iprcparm(smth_kind_),'Smoother kind',& + & smth_omg_,is_legal_ml_smth_kind) + call psb_check_def(p%iprcparm(coarse_mat_),'Coarse matrix',& + & mat_distr_,is_legal_ml_coarse_mat) + call psb_check_def(p%iprcparm(smth_pos_),'smooth_pos',& + & pre_smooth_,is_legal_ml_smooth_pos) + + +!!$ nullify(p%desc_data) + select case(p%iprcparm(f_type_)) + case(f_ilu_n_) + call psb_check_def(p%iprcparm(ilu_fill_in_),'Level',0,is_legal_ml_lev) + case(f_ilu_e_) + call psb_check_def(p%dprcparm(fact_eps_),'Eps',dzero,is_legal_ml_eps) + end select + call psb_check_def(p%dprcparm(smooth_omega_),'omega',dzero,is_legal_omega) + call psb_check_def(p%iprcparm(jac_sweeps_),'Jacobi sweeps',& + & 1,is_legal_jac_sweeps) + + + ! Currently this is ignored by gen_aggrmap, but it could be + ! changed in the future. Need to package nlaggr & mlia in a + ! private data structure? + call psb_genaggrmap(p%iprcparm(aggr_alg_),a,desc_a,p%nlaggr,p%mlia,info) + if(info /= 0) then + info=4010 + ch_err='psb_gen_aggrmap' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) 'Out from genaggrmap',p%nlaggr + + call psb_nullify_desc(desc_ac) + call psb_bldaggrmat(a,desc_a,ac,desc_ac,p,info) + if(info /= 0) then + info=4010 + ch_err='psb_bld_aggrmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'Out from bldaggrmat',desc_ac%matrix_data(:) + + + + call psb_baseprc_bld(ac,desc_ac,p,info) + if (debug) write(0,*) 'Out from baseprcbld',info + if(info /= 0) then + info=4010 + ch_err='psb_baseprc_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! We have used a separate ac because: + ! 1. We want to reuse the same routines psb_ilu_bld etc. + ! 2. We do NOT want to pass an argument twice to them + ! p%av(ac_) and p, as this would violate the Fortran standard + ! Hence a separate AC and a TRANSFER function at the end. + ! + call psb_sp_transfer(ac,p%av(ac_),info) + p%base_a => p%av(ac_) + call psb_cdtransfer(desc_ac,p%desc_ac,info) + + if (info /= 0) then + info=4010 + ch_err='psb_cdtransfer' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + p%base_desc => p%desc_ac + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +end subroutine psb_dmlprc_bld diff --git a/psb_dprc_aply.f90 b/psb_dprc_aply.f90 new file mode 100644 index 00000000..0ec31ae6 --- /dev/null +++ b/psb_dprc_aply.f90 @@ -0,0 +1,250 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dprc_aply(prec,x,y,desc_data,info,trans, work) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + real(kind(0.d0)), optional, target :: work(:) + + ! Local variables + character :: trans_ + real(kind(1.d0)), pointer :: work_(:) + integer :: ictxt,np,me,err_act + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name + + interface psb_baseprc_aply + subroutine psb_dbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_dbaseprc_aply + end interface + + interface psb_mlprc_aply + subroutine psb_dmlprc_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbaseprc_type), intent(in) :: baseprecv(:) + real(kind(0.d0)),intent(in) :: alpha,beta + real(kind(0.d0)),intent(inout) :: x(:), y(:) + character :: trans + real(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_dmlprc_aply + end interface + + name='psb_dprc_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + if (present(work)) then + work_ => work + else + allocate(work_(4*desc_data%matrix_data(psb_n_col_)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + end if + + if (.not.(allocated(prec%baseprecv))) then + write(0,*) 'Inconsistent preconditioner: neither SMTH nor BASE?' + end if + if (size(prec%baseprecv) >1) then + if (debug) write(0,*) 'Into mlprc_aply',size(x),size(y) + call psb_mlprc_aply(done,prec%baseprecv,x,dzero,y,desc_data,trans_,work_,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_dmlprc_aply') + goto 9999 + end if + + else if (size(prec%baseprecv) == 1) then + call psb_baseprc_aply(done,prec%baseprecv(1),x,dzero,y,desc_data,trans_, work_,info) + else + write(0,*) 'Inconsistent preconditioner: size of baseprecv???' + endif + + if (present(work)) then + else + deallocate(work_) + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dprc_aply + + +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dprc_aply1(prec,x,desc_data,info,trans) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:) + integer, intent(out) :: info + character(len=1), optional :: trans + logical,parameter :: debug=.false., debugprt=.false. + + interface + subroutine psb_dprc_aply(prec,x,y,desc_data,info,trans, work) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + real(kind(0.d0)), optional, target :: work(:) + end subroutine psb_dprc_aply + end interface + + ! Local variables + character :: trans_ + integer :: ictxt,np,me,i, err_act + real(kind(1.d0)), pointer :: WW(:), w1(:) + character(len=20) :: name + name='psb_dprec1' + info = 0 + call psb_erractionsave(err_act) + + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + allocate(ww(size(x)),w1(size(x)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + if (debug) write(0,*) 'Prc_aply1 Size(x) ',size(x), size(ww),size(w1) + call psb_dprc_aply(prec,x,ww,desc_data,info,trans_,work=w1) + if(info /=0) goto 9999 + x(:) = ww(:) + deallocate(ww,W1) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psb_dprc_aply1 diff --git a/psb_dprecbld.f90 b/psb_dprecbld.f90 new file mode 100644 index 00000000..53debd43 --- /dev/null +++ b/psb_dprecbld.f90 @@ -0,0 +1,170 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dprecbld(a,desc_a,p,info,upd) + + use psb_base_mod + use psb_prec_type + use psb_prec_mod + Implicit None + + type(psb_dspmat_type), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_dprec_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + + ! Local scalars + Integer :: err,i,j,k,ictxt, me,np,lw, err_act + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_precbld' + + if (debug) write(0,*) 'Entering precbld',P%prec,desc_a%matrix_data(:) + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + + if (debug) write(0,*) 'Preconditioner psb_info' + call psb_info(ictxt, me, np) + + if (present(upd)) then + if (debug) write(0,*) 'UPD ', upd + if ((upd.eq.'F').or.(upd.eq.'T')) then + iupd=upd + else + iupd='F' + endif + else + iupd='F' + endif + + if (.not.allocated(p%baseprecv)) then + !! Error 1: should call precset + info=4010 + ch_err='unallocated bpv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ! + ! Should add check to ensure all procs have the same... + ! + ! ALso should define symbolic names for the preconditioners. + ! + if (size(p%baseprecv) >= 1) then + call init_baseprc_av(p%baseprecv(1),info) + if (info /= 0) then + info=4010 + ch_err='allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_baseprc_bld(a,desc_a,p%baseprecv(1),info,iupd) + + else + info=4010 + ch_err='size bpv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + endif + + if (size(p%baseprecv) > 1) then + + do i=2, size(p%baseprecv) + + call init_baseprc_av(p%baseprecv(i),info) + if (info /= 0) then + info=4010 + ch_err='allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_mlprc_bld(p%baseprecv(i-1)%base_a,p%baseprecv(i-1)%base_desc,& + & p%baseprecv(i),info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name) + goto 9999 + endif + if (debug) then + write(0,*) 'Return from ',i-1,' call to mlprcbld ',info + endif + + end do + + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine init_baseprc_av(p,info) + type(psb_dbaseprc_type), intent(inout) :: p + integer :: info + if (allocated(p%av)) then + ! Have not decided what to do yet + end if + allocate(p%av(max_avsz),stat=info) +!!$ if (info /= 0) return + do k=1,size(p%av) + call psb_nullify_sp(p%av(k)) + end do + + end subroutine init_baseprc_av + +end subroutine psb_dprecbld + diff --git a/psb_dprecfree.f90 b/psb_dprecfree.f90 new file mode 100644 index 00000000..e1dd3264 --- /dev/null +++ b/psb_dprecfree.f90 @@ -0,0 +1,72 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dprecfree(p,info) + use psb_base_mod + use psb_prec_type + implicit none + type(psb_dprec_type), intent(inout) :: p + integer, intent(out) :: info + + !...locals.... + integer :: ictxt,me,np,err_act,i + character(len=20) :: name + + if(psb_get_errstatus().ne.0) return + info=0 + name = 'psdprecfree' + call psb_erractionsave(err_act) + + me=-1 + + if (allocated(p%baseprecv)) then + do i=1,size(p%baseprecv) + call psb_base_precfree(p%baseprecv(i),info) + end do + deallocate(p%baseprecv) + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dprecfree diff --git a/psb_dprecset.f90 b/psb_dprecset.f90 new file mode 100644 index 00000000..58cc4cd2 --- /dev/null +++ b/psb_dprecset.f90 @@ -0,0 +1,187 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dprecset(p,ptype,info,iv,rs,rv,ilev,nlev) + + use psb_base_mod + use psb_prec_type + implicit none + type(psb_dprec_type), intent(inout) :: p + character(len=*), intent(in) :: ptype + integer, intent(out) :: info + integer, optional, intent(in) :: iv(:) + integer, optional, intent(in) :: nlev,ilev + real(kind(1.d0)), optional, intent(in) :: rs + real(kind(1.d0)), optional, intent(in) :: rv(:) + + character(len=len(ptype)) :: typeup + integer :: isz, err, nlev_, ilev_, i + + info = 0 + + if (present(ilev)) then + ilev_ = max(1, ilev) + else + ilev_ = 1 + end if + if (present(nlev)) then + if (allocated(p%baseprecv)) then + write(0,*) 'Warning: NLEV is ignored when P is already allocated' + end if + nlev_ = max(1, nlev) + else + nlev_ = 1 + end if + + if (.not.allocated(p%baseprecv)) then + allocate(p%baseprecv(nlev_),stat=err) + else + nlev_ = size(p%baseprecv) + endif + + if ((ilev_<1).or.(ilev_ > nlev_)) then + write(0,*) 'PRECSET ERRROR: ilev out of bounds' + info = -1 + return + endif + + call psb_realloc(ifpsz,p%baseprecv(ilev_)%iprcparm,info) + if (info == 0) call psb_realloc(dfpsz,p%baseprecv(ilev_)%dprcparm,info) + if (info /= 0) return + p%baseprecv(ilev_)%iprcparm(:) = 0 + + select case(toupper(ptype(1:len_trim(ptype)))) + case ('NONE','NOPREC') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = noprec_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_none_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('DIAG','DIAGSC') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = diagsc_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_none_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('BJA','ILU') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = bja_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('ASM','AS') + p%baseprecv(ilev_)%iprcparm(:) = 0 + ! Defaults first + p%baseprecv(ilev_)%iprcparm(p_type_) = asm_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_halo_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 1 + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(ilev_)%iprcparm(n_ovr_) = iv(1) + if (isz >= 2) p%baseprecv(ilev_)%iprcparm(restr_) = iv(2) + if (isz >= 3) p%baseprecv(ilev_)%iprcparm(prol_) = iv(3) + if (isz >= 4) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(4) + ! Do not consider renum for the time being. +!!$ if (isz >= 5) p%baseprecv(ilev_)%iprcparm(iren_) = iv(5) + end if + + + case ('ML', '2L', '2LEV') + + + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = bja_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(ml_type_) = mult_ml_prec_ + p%baseprecv(ilev_)%iprcparm(aggr_alg_) = loc_aggr_ + p%baseprecv(ilev_)%iprcparm(smth_kind_) = smth_omg_ + p%baseprecv(ilev_)%iprcparm(coarse_mat_) = mat_distr_ + p%baseprecv(ilev_)%iprcparm(smth_pos_) = post_smooth_ + p%baseprecv(ilev_)%iprcparm(glb_smth_) = 1 + p%baseprecv(ilev_)%iprcparm(om_choice_) = lib_choice_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%dprcparm(smooth_omega_) = 4.d0/3.d0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(ilev_)%iprcparm(ml_type_) = iv(1) + if (isz >= 2) p%baseprecv(ilev_)%iprcparm(aggr_alg_) = iv(2) + if (isz >= 3) p%baseprecv(ilev_)%iprcparm(coarse_mat_) = iv(3) + if (isz >= 4) p%baseprecv(ilev_)%iprcparm(smth_pos_) = iv(4) + if (isz >= 5) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(5) + if (isz >= 6) p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = iv(6) + if (isz >= 7) p%baseprecv(ilev_)%iprcparm(smth_kind_) = iv(7) + end if + + if (present(rs)) then + p%baseprecv(ilev_)%iprcparm(om_choice_) = user_choice_ + p%baseprecv(ilev_)%dprcparm(smooth_omega_) = rs + end if + + + case default + write(0,*) 'Unknown preconditioner type request "',ptype,'"' + err = 2 + + end select + + info = err + +end subroutine psb_dprecset diff --git a/psb_dslu_bld.f90 b/psb_dslu_bld.f90 new file mode 100644 index 00000000..b1d824fd --- /dev/null +++ b/psb_dslu_bld.f90 @@ -0,0 +1,206 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dslu_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + + implicit none + + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + + type(psb_dspmat_type) :: blck, atmp + character(len=5) :: fmt + character :: upd='F' + integer :: i,j,nza,nzb,nzt,ictxt,me,np,err_act + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + interface psb_asmatbld + Subroutine psb_dasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_dasmatbld + end interface + + if(psb_get_errstatus().ne.0) return + info=0 + name='psb_slu_bld' + call psb_erractionsave(err_act) + + ictxt = desc_a%matrix_data(psb_ctxt_) + + call psb_info(ictxt, me, np) + + fmt = 'COO' + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + atmp%fida='COO' + if (Debug) then + write(0,*) me, 'SPLUBLD: Calling csdp' + call psb_barrier(ictxt) + endif + + call psb_csdp(a,atmp,info) + if(info /= 0) then + info=4010 + ch_err='psb_csdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nza = atmp%infoa(psb_nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done csdp',info,nza,atmp%m,atmp%k + call psb_barrier(ictxt) + endif + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info,outfmt=fmt) + if(info /= 0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzb = blck%infoa(psb_nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done asmatbld',info,nzb,blck%fida + call psb_barrier(ictxt) + endif + if (nzb > 0 ) then + if (size(atmp%aspk) size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + deallocate(itmp,itmp2,rtmp) + + else if (p%iprcparm(iren_)==renum_gps_) then + + atmp%m = a%m + blck%m + atmp%k = a%k + atmp%fida='CSR' + atmp%descra = 'GUN' + do i=1, a%m + atmp%ia2(i) = a%ia2(i) + do j= a%ia2(i), a%ia2(i+1)-1 + atmp%ia1(j) = a%ia1(j) + enddo + enddo + atmp%ia2(a%m+1) = a%ia2(a%m+1) + nztota = atmp%ia2(a%m+1) -1 + if (blck%m>0) then + do i=1, blck%m + atmp%ia2(a%m+i) = nztota+blck%ia2(i) + do j= blck%ia2(i), blck%ia2(i+1)-1 + atmp%ia1(nztota+j) = blck%ia1(j) + enddo + enddo + atmp%ia2(atmp%m+1) = nztota+blck%ia2(blck%m+1) + endif + nztmp = atmp%ia2(atmp%m+1) - 1 + + + ! This is a renumbering with Gibbs-Poole-Stockmeyer + ! band reduction. Switched off for now. To be fixed, + ! gps_reduction should get p%perm. + + ! write(0,*) me,' Renumbering: realloc perms',atmp%m + call psb_realloc(atmp%m,p%perm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_realloc(atmp%m,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(itmp(max(8,atmp%m+2,nztmp+2)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + itmp(1:8) = 0 + ! write(0,*) me,' Renumbering: Calling Metis' + + ! write(0,*) size(p%av(u_pr_)%pl),size(p%av(l_pr_)%pr) + call gps_reduction(atmp%m,atmp%ia2,atmp%ia1,p%perm,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='gps_reduction' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! write(0,*) me,' Renumbering: Done GPS' + ! call psb_barrier(ictxt) + do i=1, atmp%m + if (p%perm(i) /= i) then + write(0,*) me,' permutation is not identity ' + exit + endif + enddo + + + do k=1, nnr + p%invperm(p%perm(k)) = k + enddo + t3 = mpi_wtime() + + ! Build ATMP with new numbering. + + allocate(itmp2(max(8,atmp%m+2,nztmp+2)),rtmp(atmp%m),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + j = 1 + atmp%ia2(1) = 1 + do i=1, atmp%m + ir = p%perm(i) + + if (ir <= a%m ) then + + nzl = a%ia2(ir+1) - a%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + + deallocate(itmp,itmp2,rtmp) + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + + subroutine gps_reduction(m,ia,ja,perm,iperm,info) + integer i,j,dgConn,Npnt,m + integer n,idpth,ideg,ibw2,ipf2 + integer,dimension(:) :: perm,iperm,ia,ja + integer, intent(out) :: info + + integer,dimension(:,:),allocatable::NDstk + integer,dimension(:),allocatable::iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor + + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + name='gps_reduction' + call psb_erractionsave(err_act) + + + !--- Calcolo il massimo grado di connettivita'. + npnt = m + write(6,*) ' GPS su ',npnt + dgConn=0 + do i=1,m + dgconn = max(dgconn,(ia(i+1)-ia(i))) + enddo + !--- Il max valore di connettivita' e "dgConn" + + !--- Valori della common + n=Npnt !--- Numero di righe + iDeg=dgConn !--- Massima connettivita' + ! iDpth= !--- Numero di livelli non serve settarlo + + allocate(NDstk(Npnt,dgConn),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction first alloc OK' + endif + allocate(iOld(Npnt),renum(Npnt+1),ndeg(Npnt),lvl(Npnt),lvls1(Npnt),& + &lvls2(Npnt),ccstor(Npnt),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction 2nd alloc OK' + endif + + !--- Prepariamo il grafo della matrice + Ndstk(:,:)=0 + do i=1,Npnt + k=0 + do j = ia(i),ia(i+1) - 1 + if ((1<=ja(j)).and.( ja( j ) /= i ).and.(ja(j)<=npnt)) then + k = k+1 + Ndstk(i,k)=ja(j) + endif + enddo + ndeg(i)=k + enddo + + !--- Numerazione. + do i=1,Npnt + iOld(i)=i + enddo + write(0,*) 'gps_red : Preparation done' + !--- + !--- Chiamiamo funzione reduce. + call psb_gps_reduce(Ndstk,Npnt,iOld,renum,ndeg,lvl,lvls1, lvls2,ccstor,& + & ibw2,ipf2,n,idpth,ideg) + write(0,*) 'gps_red : Done reduce' + !--- Permutazione + perm(1:Npnt)=renum(1:Npnt) + !--- Inversa permutazione + do i=1,Npnt + iperm(perm(i))=i + enddo + !--- Puliamo tutto. + deallocate(NDstk,iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine gps_reduction + +end subroutine psb_dsp_renum diff --git a/psb_dumf_bld.f90 b/psb_dumf_bld.f90 new file mode 100644 index 00000000..cb3c9008 --- /dev/null +++ b/psb_dumf_bld.f90 @@ -0,0 +1,212 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_dumf_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + + type(psb_dspmat_type) :: blck, atmp + character(len=5) :: fmt + character :: upd='F' + integer :: i,j,nza,nzb,nzt,ictxt,me,np,err_act + integer :: i_err(5) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + interface psb_asmatbld + Subroutine psb_dasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_dasmatbld + end interface + + info=0 + name='psb_umf_bld' + call psb_erractionsave(err_act) + + ictxt = desc_A%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + fmt = 'COO' + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + atmp%fida='COO' + if (Debug) then + write(0,*) me, 'UMFBLD: Calling csdp' + call psb_barrier(ictxt) + endif + + call psb_dcsdp(a,atmp,info) + if(info /= 0) then + info=4010 + ch_err='psb_dcsdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nza = psb_sp_get_nnzeros(atmp) + nzb = psb_sp_get_nnzeros(a) + + if (Debug) then + write(0,*) me, 'UMFBLD: Done csdp',info,nza,atmp%m,atmp%k,nzb + call psb_barrier(ictxt) + endif + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info,outfmt=fmt) + if(info /= 0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzb = psb_sp_get_nnzeros(blck) + if (Debug) then + write(0,*) me, 'UMFBLD: Done asmatbld',info,nzb,blck%fida + call psb_barrier(ictxt) + endif + if (nzb > 0 ) then + if (size(atmp%aspk) null() ! + type(psb_desc_type), pointer :: base_desc=> null() ! + real(kind(1.d0)), allocatable :: dorig(:) + + end type psb_dbaseprc_type + + + ! + ! Multilevel preconditioning + ! + ! To each level I there corresponds a matrix A(I) and a preconditioner K(I) + ! + ! A notational difference: in the DD reference above the preconditioner for + ! a given level K(I) is written out as a sum over the subdomains + ! + ! SUM_k(R_k^T A_k R_k) + ! + ! whereas in this code the sum is implicit in the parallelization, + ! i.e. each process takes care of one subdomain, and for each level we have + ! as many subdomains as there are processes (except for the coarsest level where + ! we might have a replicated index space). Thus the sum apparently disappears + ! from our code, but only apparently, because it is implicit in the call + ! to psb_baseprc_aply. + ! + ! A bit of description of the baseprecv(:) data structure: + ! 1. Number of levels = NLEV = size(baseprecv(:)) + ! 2. baseprecv(ilev)%av(:) sparse matrices needed for the current level. + ! Includes: + ! 2.1.: baseprecv(ilev)%av(l_pr_) L factor of ILU preconditioners + ! 2.2.: baseprecv(ilev)%av(u_pr_) U factor of ILU preconditioners + ! 2.3.: baseprecv(ilev)%av(ap_nd_) Off-diagonal part of A for Jacobi sweeps + ! 2.4.: baseprecv(ilev)%av(ac_) Aggregated matrix of level ILEV + ! 2.5.: baseprecv(ilev)%av(sm_pr_t_) Smoother prolongator transpose; maps vectors + ! (ilev-1) ---> (ilev) + ! 2.6.: baseprecv(ilev)%av(sm_pr_) Smoother prolongator; maps vectors + ! (ilev) ---> (ilev-1) + ! Shouldn't we keep just one of them and handle transpose in the sparse BLAS? maybe + ! + ! 3. baseprecv(ilev)%desc_data comm descriptor for level ILEV + ! 4. baseprecv(ilev)%base_a Pointer (really a pointer!) to the base matrix + ! of the current level, i.e.: if ILEV=1 then A + ! else the aggregated matrix av(ac_); so we have + ! a unified treatment of residuals. Need this to + ! avoid passing explicitly matrix A to the + ! outer prec. routine + ! 5. baseprecv(ilev)%mlia The aggregation map from (ilev-1)-->(ilev) + ! if no smoother, it is used instead of sm_pr_ + ! 6. baseprecv(ilev)%nlaggr Number of aggregates on the various procs. + ! + type psb_dprec_type + type(psb_dbaseprc_type), allocatable :: baseprecv(:) + ! contain type of preconditioning to be performed + integer :: prec, base_prec + end type psb_dprec_type + + type psb_zbaseprc_type + + type(psb_zspmat_type), allocatable :: av(:) + complex(kind(1.d0)), allocatable :: d(:) + type(psb_desc_type) :: desc_data , desc_ac + integer, allocatable :: iprcparm(:) + real(kind(1.d0)), allocatable :: dprcparm(:) + integer, allocatable :: perm(:), invperm(:) + integer, allocatable :: mlia(:), nlaggr(:) + type(psb_zspmat_type), pointer :: base_a => null() ! + type(psb_desc_type), pointer :: base_desc => null() ! + complex(kind(1.d0)), allocatable :: dorig(:) + + end type psb_zbaseprc_type + + type psb_zprec_type + type(psb_zbaseprc_type), allocatable :: baseprecv(:) + ! contain type of preconditioning to be performed + integer :: prec, base_prec + end type psb_zprec_type + + + character(len=15), parameter, private :: & + & smooth_names(1:3)=(/'Pre-smoothing ','Post-smoothing',& + & 'Smooth both '/) + character(len=15), parameter, private :: & + & smooth_kinds(0:2)=(/'No smoother ','Omega smoother',& + & 'Bizr. smoother'/) + character(len=15), parameter, private :: & + & matrix_names(0:1)=(/'Distributed ','Replicated '/) + character(len=18), parameter, private :: & + & aggr_names(0:3)=(/'Local aggregation ','Global aggregation',& + & 'New local aggr. ','New global aggr. '/) + character(len=6), parameter, private :: & + & restrict_names(0:4)=(/'None ',' ',' ',' ','Halo '/) + character(len=12), parameter, private :: & + & prolong_names(0:3)=(/'None ','Sum ','Average ','Square root'/) + character(len=15), parameter, private :: & + & ml_names(0:3)=(/'None ','Additive ','Multiplicative',& + & 'New ML '/) + character(len=15), parameter, private :: & + & fact_names(0:4)=(/'None ','ILU(n) ',& + & 'ILU(eps) ','Sparse SuperLU','UMFPACK Sp. LU'/) + + interface psb_base_precfree + module procedure psb_dbase_precfree, psb_zbase_precfree + end interface + + interface psb_nullify_baseprec + module procedure psb_nullify_dbaseprec, psb_nullify_zbaseprec + end interface + + interface psb_check_def + module procedure psb_icheck_def, psb_dcheck_def + end interface + + interface psb_prec_descr + module procedure psb_out_prec_descr, psb_file_prec_descr, & + & psb_zout_prec_descr, psb_zfile_prec_descr + end interface + + interface psb_prec_short_descr + module procedure psb_prec_short_descr, psb_zprec_short_descr + end interface + +contains + + subroutine psb_out_prec_descr(p) + type(psb_dprec_type), intent(in) :: p + call psb_file_prec_descr(6,p) + end subroutine psb_out_prec_descr + + subroutine psb_zout_prec_descr(p) + type(psb_zprec_type), intent(in) :: p + call psb_zfile_prec_descr(6,p) + end subroutine psb_zout_prec_descr + + subroutine psb_file_prec_descr(iout,p) + integer, intent(in) :: iout + type(psb_dprec_type), intent(in) :: p + integer :: ilev + + write(iout,*) 'Preconditioner description' + if (allocated(p%baseprecv)) then + if (size(p%baseprecv)>=1) then + write(iout,*) 'Base preconditioner' + select case(p%baseprecv(1)%iprcparm(p_type_)) + case(noprec_) + write(iout,*) 'No preconditioning' + case(diagsc_) + write(iout,*) 'Diagonal scaling' + case(bja_) + write(iout,*) 'Block Jacobi with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + case(asm_,ras_,ash_,rash_) + write(iout,*) 'Additive Schwarz with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + write(iout,*) 'Overlap:',& + & p%baseprecv(1)%iprcparm(n_ovr_) + write(iout,*) 'Restriction: ',& + & restrict_names(p%baseprecv(1)%iprcparm(restr_)) + write(iout,*) 'Prolongation: ',& + & prolong_names(p%baseprecv(1)%iprcparm(prol_)) + end select + end if + if (size(p%baseprecv)>=2) then + do ilev = 2, size(p%baseprecv) + if (.not.allocated(p%baseprecv(ilev)%iprcparm)) then + write(iout,*) 'Inconsistent MLPREC part!' + return + endif + + write(iout,*) 'Multilevel: Level No', ilev + write(iout,*) 'Multilevel type: ',& + & ml_names(p%baseprecv(ilev)%iprcparm(ml_type_)) + if (p%baseprecv(ilev)%iprcparm(ml_type_)>no_ml_) then + write(iout,*) 'Multilevel aggregation: ', & + & aggr_names(p%baseprecv(ilev)%iprcparm(aggr_alg_)) + write(iout,*) 'Smoother: ', & + & smooth_kinds(p%baseprecv(ilev)%iprcparm(smth_kind_)) + if (p%baseprecv(ilev)%iprcparm(smth_kind_) /= no_smth_) then + write(iout,*) 'Smoothing omega: ', & + & p%baseprecv(ilev)%dprcparm(smooth_omega_) + write(iout,*) 'Smoothing position: ',& + & smooth_names(p%baseprecv(ilev)%iprcparm(smth_pos_)) + end if + write(iout,*) 'Coarse matrix: ',& + & matrix_names(p%baseprecv(ilev)%iprcparm(coarse_mat_)) + if (allocated(p%baseprecv(ilev)%nlaggr)) then + write(iout,*) 'Aggregation sizes: ', & + & sum( p%baseprecv(ilev)%nlaggr(:)),' : ',p%baseprecv(ilev)%nlaggr(:) + end if + write(iout,*) 'Factorization type: ',& + & fact_names(p%baseprecv(ilev)%iprcparm(f_type_)) + select case(p%baseprecv(ilev)%iprcparm(f_type_)) + case(f_ilu_n_) + write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(ilu_fill_in_) + case(f_ilu_e_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(fact_eps_) + case(f_slu_,f_umf_) + case default + write(iout,*) 'Should never get here!' + end select + write(iout,*) 'Number of Jacobi sweeps: ', & + & (p%baseprecv(ilev)%iprcparm(jac_sweeps_)) + end if + end do + end if + + else + write(iout,*) 'No Base preconditioner available, something is wrong!' + return + endif + + end subroutine psb_file_prec_descr + + function psb_prec_short_descr(p) + type(psb_dprec_type), intent(in) :: p + character(len=20) :: psb_prec_short_descr + psb_prec_short_descr = ' ' +!!$ write(iout,*) 'Preconditioner description' +!!$ if (associated(p%baseprecv)) then +!!$ if (size(p%baseprecv)>=1) then +!!$ write(iout,*) 'Base preconditioner' +!!$ select case(p%baseprecv(1)%iprcparm(p_type_)) +!!$ case(noprec_) +!!$ write(iout,*) 'No preconditioning' +!!$ case(diagsc_) +!!$ write(iout,*) 'Diagonal scaling' +!!$ case(bja_) +!!$ write(iout,*) 'Block Jacobi with: ',& +!!$ & fact_names(p%baseprecv(1)%iprcparm(f_type_)) +!!$ case(asm_,ras_,ash_,rash_) +!!$ write(iout,*) 'Additive Schwarz with: ',& +!!$ & fact_names(p%baseprecv(1)%iprcparm(f_type_)) +!!$ write(iout,*) 'Overlap:',& +!!$ & p%baseprecv(1)%iprcparm(n_ovr_) +!!$ write(iout,*) 'Restriction: ',& +!!$ & restrict_names(p%baseprecv(1)%iprcparm(restr_)) +!!$ write(iout,*) 'Prolongation: ',& +!!$ & prolong_names(p%baseprecv(1)%iprcparm(prol_)) +!!$ end select +!!$ end if +!!$ if (size(p%baseprecv)>=2) then +!!$ if (.not.associated(p%baseprecv(2)%iprcparm)) then +!!$ write(iout,*) 'Inconsistent MLPREC part!' +!!$ return +!!$ endif +!!$ write(iout,*) 'Multilevel: ',ml_names(p%baseprecv(2)%iprcparm(ml_type_)) +!!$ if (p%baseprecv(2)%iprcparm(ml_type_)>no_ml_) then +!!$ write(iout,*) 'Multilevel aggregation: ', & +!!$ & aggr_names(p%baseprecv(2)%iprcparm(aggr_alg_)) +!!$ write(iout,*) 'Smoother: ', & +!!$ & smooth_kinds(p%baseprecv(2)%iprcparm(smth_kind_)) +!!$ write(iout,*) 'Smoothing omega: ', p%baseprecv(2)%dprcparm(smooth_omega_) +!!$ write(iout,*) 'Smoothing position: ',& +!!$ & smooth_names(p%baseprecv(2)%iprcparm(smth_pos_)) +!!$ write(iout,*) 'Coarse matrix: ',& +!!$ & matrix_names(p%baseprecv(2)%iprcparm(coarse_mat_)) +!!$ write(iout,*) 'Factorization type: ',& +!!$ & fact_names(p%baseprecv(2)%iprcparm(f_type_)) +!!$ select case(p%baseprecv(2)%iprcparm(f_type_)) +!!$ case(f_ilu_n_) +!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) +!!$ case(f_ilu_e_) +!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) +!!$ case(f_slu_,f_umf_) +!!$ case default +!!$ write(iout,*) 'Should never get here!' +!!$ end select +!!$ write(iout,*) 'Number of Jacobi sweeps: ', & +!!$ & (p%baseprecv(2)%iprcparm(jac_sweeps_)) +!!$ +!!$ end if +!!$ end if +!!$ +!!$ else +!!$ write(iout,*) 'No Base preconditioner available, something is wrong!' +!!$ return +!!$ endif + + end function psb_prec_short_descr + + + subroutine psb_zfile_prec_descr(iout,p) + integer, intent(in) :: iout + type(psb_zprec_type), intent(in) :: p + + write(iout,*) 'Preconditioner description' + if (allocated(p%baseprecv)) then + if (size(p%baseprecv)>=1) then + write(iout,*) 'Base preconditioner' + select case(p%baseprecv(1)%iprcparm(p_type_)) + case(noprec_) + write(iout,*) 'No preconditioning' + case(diagsc_) + write(iout,*) 'Diagonal scaling' + case(bja_) + write(iout,*) 'Block Jacobi with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + case(asm_,ras_,ash_,rash_) + write(iout,*) 'Additive Schwarz with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + write(iout,*) 'Overlap:',& + & p%baseprecv(1)%iprcparm(n_ovr_) + write(iout,*) 'Restriction: ',& + & restrict_names(p%baseprecv(1)%iprcparm(restr_)) + write(iout,*) 'Prolongation: ',& + & prolong_names(p%baseprecv(1)%iprcparm(prol_)) + end select + end if + if (size(p%baseprecv)>=2) then + if (.not.allocated(p%baseprecv(2)%iprcparm)) then + write(iout,*) 'Inconsistent MLPREC part!' + return + endif + write(iout,*) 'Multilevel: ',ml_names(p%baseprecv(2)%iprcparm(ml_type_)) + if (p%baseprecv(2)%iprcparm(ml_type_)>no_ml_) then + write(iout,*) 'Multilevel aggregation: ', & + & aggr_names(p%baseprecv(2)%iprcparm(aggr_alg_)) + write(iout,*) 'Smoother: ', & + & smooth_kinds(p%baseprecv(2)%iprcparm(smth_kind_)) + if (p%baseprecv(2)%iprcparm(smth_kind_) /= no_smth_) then + write(iout,*) 'Smoothing omega: ', p%baseprecv(2)%dprcparm(smooth_omega_) + write(iout,*) 'Smoothing position: ',& + & smooth_names(p%baseprecv(2)%iprcparm(smth_pos_)) + end if + + write(iout,*) 'Coarse matrix: ',& + & matrix_names(p%baseprecv(2)%iprcparm(coarse_mat_)) + if (allocated(p%baseprecv(ilev)%nlaggr)) then + write(iout,*) 'Aggregation sizes: ', & + & sum( p%baseprecv(2)%nlaggr(:)),' : ',p%baseprecv(2)%nlaggr(:) + endif + write(iout,*) 'Factorization type: ',& + & fact_names(p%baseprecv(2)%iprcparm(f_type_)) + select case(p%baseprecv(2)%iprcparm(f_type_)) + case(f_ilu_n_) + write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) + case(f_ilu_e_) + write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) + case(f_slu_,f_umf_) + case default + write(iout,*) 'Should never get here!' + end select + write(iout,*) 'Number of Jacobi sweeps: ', & + & (p%baseprecv(2)%iprcparm(jac_sweeps_)) + + end if + end if + + else + write(iout,*) 'No Base preconditioner available, something is wrong!' + return + endif + + end subroutine psb_zfile_prec_descr + + function psb_zprec_short_descr(p) + type(psb_zprec_type), intent(in) :: p + character(len=20) :: psb_zprec_short_descr + psb_zprec_short_descr = ' ' +!!$ write(iout,*) 'Preconditioner description' +!!$ if (associated(p%baseprecv)) then +!!$ if (size(p%baseprecv)>=1) then +!!$ write(iout,*) 'Base preconditioner' +!!$ select case(p%baseprecv(1)%iprcparm(p_type_)) +!!$ case(noprec_) +!!$ write(iout,*) 'No preconditioning' +!!$ case(diagsc_) +!!$ write(iout,*) 'Diagonal scaling' +!!$ case(bja_) +!!$ write(iout,*) 'Block Jacobi with: ',& +!!$ & fact_names(p%baseprecv(1)%iprcparm(f_type_)) +!!$ case(asm_,ras_,ash_,rash_) +!!$ write(iout,*) 'Additive Schwarz with: ',& +!!$ & fact_names(p%baseprecv(1)%iprcparm(f_type_)) +!!$ write(iout,*) 'Overlap:',& +!!$ & p%baseprecv(1)%iprcparm(n_ovr_) +!!$ write(iout,*) 'Restriction: ',& +!!$ & restrict_names(p%baseprecv(1)%iprcparm(restr_)) +!!$ write(iout,*) 'Prolongation: ',& +!!$ & prolong_names(p%baseprecv(1)%iprcparm(prol_)) +!!$ end select +!!$ end if +!!$ if (size(p%baseprecv)>=2) then +!!$ if (.not.associated(p%baseprecv(2)%iprcparm)) then +!!$ write(iout,*) 'Inconsistent MLPREC part!' +!!$ return +!!$ endif +!!$ write(iout,*) 'Multilevel: ',ml_names(p%baseprecv(2)%iprcparm(ml_type_)) +!!$ if (p%baseprecv(2)%iprcparm(ml_type_)>no_ml_) then +!!$ write(iout,*) 'Multilevel aggregation: ', & +!!$ & aggr_names(p%baseprecv(2)%iprcparm(aggr_alg_)) +!!$ write(iout,*) 'Smoother: ', & +!!$ & smooth_kinds(p%baseprecv(2)%iprcparm(smth_kind_)) +!!$ write(iout,*) 'Smoothing omega: ', p%baseprecv(2)%dprcparm(smooth_omega_) +!!$ write(iout,*) 'Smoothing position: ',& +!!$ & smooth_names(p%baseprecv(2)%iprcparm(smth_pos_)) +!!$ write(iout,*) 'Coarse matrix: ',& +!!$ & matrix_names(p%baseprecv(2)%iprcparm(coarse_mat_)) +!!$ write(iout,*) 'Factorization type: ',& +!!$ & fact_names(p%baseprecv(2)%iprcparm(f_type_)) +!!$ select case(p%baseprecv(2)%iprcparm(f_type_)) +!!$ case(f_ilu_n_) +!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) +!!$ case(f_ilu_e_) +!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) +!!$ case(f_slu_,f_umf_) +!!$ case default +!!$ write(iout,*) 'Should never get here!' +!!$ end select +!!$ write(iout,*) 'Number of Jacobi sweeps: ', & +!!$ & (p%baseprecv(2)%iprcparm(jac_sweeps_)) +!!$ +!!$ end if +!!$ end if +!!$ +!!$ else +!!$ write(iout,*) 'No Base preconditioner available, something is wrong!' +!!$ return +!!$ endif + + end function psb_zprec_short_descr + + + + + function is_legal_base_prec(ip) + integer, intent(in) :: ip + logical :: is_legal_base_prec + + is_legal_base_prec = ((ip>=noprec_).and.(ip<=rash_)) + return + end function is_legal_base_prec + function is_legal_n_ovr(ip) + integer, intent(in) :: ip + logical :: is_legal_n_ovr + + is_legal_n_ovr = (ip >=0) + return + end function is_legal_n_ovr + function is_legal_renum(ip) + integer, intent(in) :: ip + logical :: is_legal_renum + ! For the time being we are disabling renumbering options. + is_legal_renum = (ip ==0) + return + end function is_legal_renum + function is_legal_jac_sweeps(ip) + integer, intent(in) :: ip + logical :: is_legal_jac_sweeps + + is_legal_jac_sweeps = (ip >= 1) + return + end function is_legal_jac_sweeps + function is_legal_prolong(ip) + integer, intent(in) :: ip + logical :: is_legal_prolong + + is_legal_prolong = ((ip>=psb_none_).and.(ip<=psb_square_root_)) + return + end function is_legal_prolong + function is_legal_restrict(ip) + integer, intent(in) :: ip + logical :: is_legal_restrict + is_legal_restrict = ((ip==psb_nohalo_).or.(ip==psb_halo_)) + return + end function is_legal_restrict + function is_legal_ml_type(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_type + + is_legal_ml_type = ((ip>=no_ml_).and.(ip<=max_ml_)) + return + end function is_legal_ml_type + function is_legal_ml_aggr_kind(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_aggr_kind + + is_legal_ml_aggr_kind = ((ip>=loc_aggr_).and.(ip<=max_aggr_)) + return + end function is_legal_ml_aggr_kind + function is_legal_ml_smooth_pos(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_smooth_pos + + is_legal_ml_smooth_pos = ((ip>=pre_smooth_).and.(ip<=max_smooth_)) + return + end function is_legal_ml_smooth_pos + function is_legal_ml_smth_kind(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_smth_kind + + is_legal_ml_smth_kind = ((ip>=no_smth_).and.(ip<=smth_biz_)) + return + end function is_legal_ml_smth_kind + function is_legal_ml_coarse_mat(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_coarse_mat + + is_legal_ml_coarse_mat = ((ip>=mat_distr_).and.(ip<=mat_repl_)) + return + end function is_legal_ml_coarse_mat + function is_legal_ml_fact(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_fact + + is_legal_ml_fact = ((ip>=f_ilu_n_).and.(ip<=f_umf_)) + return + end function is_legal_ml_fact + function is_legal_ml_lev(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_lev + + is_legal_ml_lev = (ip>=0) + return + end function is_legal_ml_lev + function is_legal_omega(ip) + real(kind(1.d0)), intent(in) :: ip + logical :: is_legal_omega + + is_legal_omega = ((ip>=0.0d0).and.(ip<=2.0d0)) + return + end function is_legal_omega + function is_legal_ml_eps(ip) + real(kind(1.d0)), intent(in) :: ip + logical :: is_legal_ml_eps + + is_legal_ml_eps = (ip>=0.0d0) + return + end function is_legal_ml_eps + + + subroutine psb_icheck_def(ip,name,id,is_legal) + integer, intent(inout) :: ip + integer, intent(in) :: id + character(len=*), intent(in) :: name + interface + function is_legal(i) + integer, intent(in) :: i + logical :: is_legal + end function is_legal + end interface + + if (.not.is_legal(ip)) then + write(0,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id + ip = id + end if + end subroutine psb_icheck_def + + subroutine psb_dcheck_def(ip,name,id,is_legal) + real(kind(1.d0)), intent(inout) :: ip + real(kind(1.d0)), intent(in) :: id + character(len=*), intent(in) :: name + interface + function is_legal(i) + real(kind(1.d0)), intent(in) :: i + logical :: is_legal + end function is_legal + end interface + + if (.not.is_legal(ip)) then + write(0,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id + ip = id + end if + end subroutine psb_dcheck_def + + subroutine psb_dbase_precfree(p,info) + use psb_base_mod + + type(psb_dbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + integer :: i + + info = 0 + + ! Actually we migh just deallocate the top level array, except + ! for the inner UMFPACK or SLU stuff + + if (allocated(p%d)) then + deallocate(p%d,stat=info) + end if + + if (allocated(p%av)) then + do i=1,size(p%av) + call psb_sp_free(p%av(i),info) + if (info /= 0) then + ! Actually, we don't care here about this. + ! Just let it go. + ! return + end if + enddo + deallocate(p%av,stat=info) + end if + + if (allocated(p%desc_data%matrix_data)) & + & call psb_cdfree(p%desc_data,info) + if (allocated(p%desc_ac%matrix_data)) & + & call psb_cdfree(p%desc_ac,info) + + if (allocated(p%dprcparm)) then + deallocate(p%dprcparm,stat=info) + end if + ! This is a pointer to something else, must not free it here. + nullify(p%base_a) + ! This is a pointer to something else, must not free it here. + nullify(p%base_desc) + + if (allocated(p%dorig)) then + deallocate(p%dorig,stat=info) + endif + + if (allocated(p%mlia)) then + deallocate(p%mlia,stat=info) + endif + + if (allocated(p%nlaggr)) then + deallocate(p%nlaggr,stat=info) + endif + + if (allocated(p%perm)) then + deallocate(p%perm,stat=info) + endif + + if (allocated(p%invperm)) then + deallocate(p%invperm,stat=info) + endif + + if (allocated(p%iprcparm)) then + if (p%iprcparm(f_type_)==f_slu_) then + call psb_dslu_free(p%iprcparm(slu_ptr_),info) + end if + if (p%iprcparm(f_type_)==f_umf_) then + call psb_dumf_free(p%iprcparm(umf_symptr_),& + & p%iprcparm(umf_numptr_),info) + end if + deallocate(p%iprcparm,stat=info) + end if + call psb_nullify_baseprec(p) + end subroutine psb_dbase_precfree + + subroutine psb_nullify_dbaseprec(p) + use psb_base_mod + + type(psb_dbaseprc_type), intent(inout) :: p + + nullify(p%base_a) + nullify(p%base_desc) +!!$ nullify(p%av,p%d,p%iprcparm,p%dprcparm,p%perm,p%invperm,p%mlia,& +!!$ & p%nlaggr,p%base_a,p%base_desc,p%dorig,p%desc_data, p%desc_ac) + + end subroutine psb_nullify_dbaseprec + + subroutine psb_zbase_precfree(p,info) + use psb_base_mod + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + integer :: i + + info = 0 + + if (allocated(p%d)) then + deallocate(p%d,stat=info) + end if + + if (allocated(p%av)) then + do i=1,size(p%av) + call psb_sp_free(p%av(i),info) + if (info /= 0) then + ! Actually, we don't care here about this. + ! Just let it go. + ! return + end if + enddo + deallocate(p%av,stat=info) + + end if + ! call psb_cdfree(p%desc_data,info) + ! call psb_cdfree(p%desc_ac,info) + + if (allocated(p%dprcparm)) then + deallocate(p%dprcparm,stat=info) + end if + ! This is a pointer to something else, must not free it here. + nullify(p%base_a) + ! This is a pointer to something else, must not free it here. + nullify(p%base_desc) + + if (allocated(p%dorig)) then + deallocate(p%dorig,stat=info) + endif + + if (allocated(p%mlia)) then + deallocate(p%mlia,stat=info) + endif + + if (allocated(p%nlaggr)) then + deallocate(p%nlaggr,stat=info) + endif + + if (allocated(p%perm)) then + deallocate(p%perm,stat=info) + endif + + if (allocated(p%invperm)) then + deallocate(p%invperm,stat=info) + endif + + if (allocated(p%iprcparm)) then + if (p%iprcparm(f_type_)==f_slu_) then + call psb_zslu_free(p%iprcparm(slu_ptr_),info) + end if + if (p%iprcparm(f_type_)==f_umf_) then + call psb_zumf_free(p%iprcparm(umf_symptr_),& + & p%iprcparm(umf_numptr_),info) + end if + deallocate(p%iprcparm,stat=info) + end if + call psb_nullify_baseprec(p) + end subroutine psb_zbase_precfree + + subroutine psb_nullify_zbaseprec(p) + use psb_base_mod + + type(psb_zbaseprc_type), intent(inout) :: p + + + nullify(p%base_a) + nullify(p%base_desc) + + end subroutine psb_nullify_zbaseprec + + + function pr_to_str(iprec) + + integer, intent(in) :: iprec + character(len=10) :: pr_to_str + + select case(iprec) + case(noprec_) + pr_to_str='NOPREC' + case(diagsc_) + pr_to_str='DIAGSC' + case(bja_) + pr_to_str='BJA' + case(asm_) + pr_to_str='ASM' + case(ash_) + pr_to_str='ASM' + case(ras_) + pr_to_str='ASM' + case(rash_) + pr_to_str='ASM' + end select + + end function pr_to_str + +end module psb_prec_type diff --git a/psb_slu_impl.c b/psb_slu_impl.c new file mode 100644 index 00000000..8f30048c --- /dev/null +++ b/psb_slu_impl.c @@ -0,0 +1,373 @@ +/* + * MD2P4 + * Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS + * for + * Parallel Sparse BLAS v2.0 + * + * (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata + * Alfredo Buttari + * Daniela di Serafino Second University of Naples + * Pasqua D'Ambra ICAR-CNR + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the MD2P4 group or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* This file is an interface to the SuperLU routines for sparse + factorization. It was obtaned by modifying the + c_fortran_dgssv.c file from the SuperLU source distribution; + original copyright terms reproduced below. + + PSBLAS v 2.0 */ + + +/* ===================== + +Copyright (c) 2003, The Regents of the University of California, through +Lawrence Berkeley National Laboratory (subject to receipt of any required +approvals from U.S. Dept. of Energy) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +(1) Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +(2) Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of +Energy nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/* + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + */ + +#ifdef Have_SLU_ +#include "dsp_defs.h" + +#define HANDLE_SIZE 8 +/* kind of integer to hold a pointer. Use int. + This might need to be changed on 64-bit systems. */ +#ifdef LargeFptr +typedef long long fptr; /* 32-bit by default */ +#else +typedef int fptr; /* 32-bit by default */ +#endif + +typedef struct { + SuperMatrix *L; + SuperMatrix *U; + int *perm_c; + int *perm_r; +} factors_t; + + +#else + +#include + +#endif + + +#ifdef Add_ +#define psb_dslu_factor_ psb_dslu_factor_ +#define psb_dslu_solve_ psb_dslu_solve_ +#define psb_dslu_free_ psb_dslu_free_ +#endif +#ifdef AddDouble_ +#define psb_dslu_factor_ psb_dslu_factor__ +#define psb_dslu_solve_ psb_dslu_solve__ +#define psb_dslu_free_ psb_dslu_free__ +#endif +#ifdef NoChange +#define psb_dslu_factor_ psb_dslu_factor +#define psb_dslu_solve_ psb_dslu_solve +#define psb_dslu_free_ psb_dslu_free +#endif + + + + +void +psb_dslu_factor_(int *n, int *nnz, + double *values, int *rowptr, int *colind, +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs LU decomposition. + * + * f_factors (input/output) fptr* + * On output contains the pointer pointing to + * the structure of the factored matrices. + * + */ + +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + + + /* Set the default input options. */ + set_default_options(&options); + + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Adjust to 0-based indexing */ + for (i = 0; i < *nnz; ++i) --colind[i]; + for (i = 0; i <= *n; ++i) --rowptr[i]; + + dCreate_CompRow_Matrix(&A, *n, *n, *nnz, values, colind, rowptr, + SLU_NR, SLU_D, SLU_GE); + L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); + if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); + if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); + + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: natural ordering + * permc_spec = 1: minimum degree on structure of A'*A + * permc_spec = 2: minimum degree on structure of A'+A + * permc_spec = 3: approximate minimum degree for unsymmetric matrices + */ + options.ColPerm=2; + permc_spec = options.ColPerm; + get_perm_c(permc_spec, &A, perm_c); + + sp_preorder(&options, &A, perm_c, etree, &AC); + + panel_size = sp_ienv(1); + relax = sp_ienv(2); + + dgstrf(&options, &AC, drop_tol, relax, panel_size, + etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); + + if ( *info == 0 ) { + Lstore = (SCformat *) L->Store; + Ustore = (NCformat *) U->Store; + dQuerySpace(L, U, &mem_usage); +#if 0 + printf("No of nonzeros in factor L = %d\n", Lstore->nnz); + printf("No of nonzeros in factor U = %d\n", Ustore->nnz); + printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); +#endif + } else { + printf("dgstrf() error returns INFO= %d\n", *info); + if ( *info <= *n ) { /* factorization completes */ + dQuerySpace(L, U, &mem_usage); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); + } + } + + /* Restore to 1-based indexing */ + for (i = 0; i < *nnz; ++i) ++colind[i]; + for (i = 0; i <= *n; ++i) ++rowptr[i]; + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->L = L; + LUfactors->U = U; + LUfactors->perm_c = perm_c; + LUfactors->perm_r = perm_r; + *f_factors = (fptr) LUfactors; + + /* Free un-wanted storage */ + SUPERLU_FREE(etree); + Destroy_SuperMatrix_Store(&A); + Destroy_CompCol_Permuted(&AC); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_dslu_solve_(int *itrans, int *n, int *nrhs, + double *b, int *ldb, +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs triangular solve + * + */ +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + if (*itrans == 0) { + trans = NOTRANS; + } else if (*itrans ==1) { + trans = TRANS; + } else if (*itrans ==2) { + trans = CONJ; + } else { + trans = NOTRANS; + } + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Extract the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + L = LUfactors->L; + U = LUfactors->U; + perm_c = LUfactors->perm_c; + perm_r = LUfactors->perm_r; + + dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE); + /* Solve the system A*X=B, overwriting B with X. */ + dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); + + Destroy_SuperMatrix_Store(&B); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_dslu_free_( +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * + * free all storage in the end + * + */ +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + /* Free the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + SUPERLU_FREE (LUfactors->perm_r); + SUPERLU_FREE (LUfactors->perm_c); + Destroy_SuperNode_Matrix(LUfactors->L); + Destroy_CompCol_Matrix(LUfactors->U); + SUPERLU_FREE (LUfactors->L); + SUPERLU_FREE (LUfactors->U); + SUPERLU_FREE (LUfactors); + *info = 0; +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + diff --git a/psb_umf_impl.c b/psb_umf_impl.c new file mode 100644 index 00000000..902ce1a3 --- /dev/null +++ b/psb_umf_impl.c @@ -0,0 +1,233 @@ +/* + * MD2P4 + * Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS + * for + * Parallel Sparse BLAS v2.0 + * + * (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata + * Alfredo Buttari + * Daniela di Serafino Second University of Naples + * Pasqua D'Ambra ICAR-CNR + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the MD2P4 group or the names of its contributors may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* This file is an interface to the UMFPACK routines for sparse + factorization. It was obtained by adapting umfpack_di_demo + under the original copyright terms reproduced below. + + PSBLAS v 2.0 */ + + +/* ===================== +UMFPACK Version 4.4 (Jan. 28, 2005), Copyright (c) 2005 by Timothy A. +Davis. All Rights Reserved. + +UMFPACK License: + + Your use or distribution of UMFPACK or any modified version of + UMFPACK implies that you agree to this License. + + THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + + Permission is hereby granted to use or copy this program, provided + that the Copyright, this License, and the Availability of the original + version is retained on all copies. User documentation of any code that + uses UMFPACK or any modified version of UMFPACK code must cite the + Copyright, this License, the Availability note, and "Used by permission." + Permission to modify the code and to distribute modified code is granted, + provided the Copyright, this License, and the Availability note are + retained, and a notice that the code was modified is included. This + software was developed with support from the National Science Foundation, + and is provided to you free of charge. + +Availability: + + http://www.cise.ufl.edu/research/sparse/umfpack + +*/ + + + +#ifdef Add_ +#define psb_dumf_factor_ psb_dumf_factor_ +#define psb_dumf_solve_ psb_dumf_solve_ +#define psb_dumf_free_ psb_dumf_free_ +#endif +#ifdef AddDouble_ +#define psb_dumf_factor_ psb_dumf_factor__ +#define psb_dumf_solve_ psb_dumf_solve__ +#define psb_dumf_free_ psb_dumf_free__ +#endif +#ifdef NoChange +#define psb_dumf_factor_ psb_dumf_factor +#define psb_dumf_solve_ psb_dumf_solve +#define psb_dumf_free_ psb_dumf_free +#endif + + +#include +#ifdef Have_UMF_ +#include "umfpack.h" +#endif + +#ifdef LargeFptr +typedef long long fptr; /* 64-bit*/ +#else +typedef int fptr; /* 32-bit by default */ +#endif + +void +psb_dumf_factor_(int *n, int *nnz, + double *values, int *rowind, int *colptr, +#ifdef Have_UMF_ + fptr *symptr, + fptr *numptr, + +#else + void *symptr, + void *numptr, +#endif + int *info) + +{ + +#ifdef Have_UMF_ + double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL]; + void *Symbolic, *Numeric ; + int i; + + + umfpack_di_defaults(Control); + + for (i = 0; i <= *n; ++i) --colptr[i]; + for (i = 0; i < *nnz; ++i) --rowind[i]; + *info = umfpack_di_symbolic (*n, *n, colptr, rowind, values, &Symbolic, + Control, Info); + + + if ( *info == UMFPACK_OK ) { + *info = 0; + } else { + printf("umfpack_di_symbolic() error returns INFO= %d\n", *info); + *info = -11; + *numptr = (fptr) NULL; + return; + } + + *symptr = (fptr) Symbolic; + + *info = umfpack_di_numeric (colptr, rowind, values, Symbolic, &Numeric, + Control, Info) ; + + + if ( *info == UMFPACK_OK ) { + *info = 0; + *numptr = (fptr) Numeric; + } else { + printf("umfpack_di_numeric() error returns INFO= %d\n", *info); + *info = -12; + *numptr = (fptr) NULL; + } + + for (i = 0; i <= *n; ++i) ++colptr[i]; + for (i = 0; i < *nnz; ++i) ++rowind[i]; +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_dumf_solve_(int *itrans, int *n, + double *x, double *b, int *ldb, +#ifdef Have_UMF_ + fptr *numptr, + +#else + void *numptr, +#endif + int *info) + +{ +#ifdef Have_UMF_ + double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL]; + void *Symbolic, *Numeric ; + int i,trans; + + + umfpack_di_defaults(Control); + Control[UMFPACK_IRSTEP]=0; + + + if (*itrans == 0) { + trans = UMFPACK_A; + } else if (*itrans ==1) { + trans = UMFPACK_At; + } else { + trans = UMFPACK_A; + } + + *info = umfpack_di_solve(trans,NULL,NULL,NULL, + x,b,(void *) *numptr,Control,Info); + +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_dumf_free_( +#ifdef Have_UMF_ + fptr *symptr, + fptr *numptr, + +#else + void *symptr, + void *numptr, +#endif + int *info) + +{ +#ifdef Have_UMF_ + void *Symbolic, *Numeric ; + Symbolic = (void *) *symptr; + Numeric = (void *) *numptr; + + umfpack_di_free_numeric(&Numeric); + umfpack_di_free_symbolic(&Symbolic); + *info=0; +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + diff --git a/psb_zasmatbld.f90 b/psb_zasmatbld.f90 new file mode 100644 index 00000000..dbcf1e98 --- /dev/null +++ b/psb_zasmatbld.f90 @@ -0,0 +1,236 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +!***************************************************************************** +!* * +!* This routine does two things: * +!* 1. Builds the auxiliary descriptor. This is always done even for * +!* Block Jacobi. * +!* 2. Retrieves the remote matrix pieces. * +!* * +!* All of 1. is done under psb_cdovr, which is independent of CSR, and * +!* has been placed in the TOOLS directory because it might be used for * +!* building a descriptor for an extended stencil in a PDE solver without * +!* necessarily applying AS precond. * +!* * +!* * +!* * +!* * +!* * +!***************************************************************************** +Subroutine psb_zasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + + use psb_base_mod + use psb_prec_type + Implicit None + + ! .. Array Arguments .. + integer, intent(in) :: ptype,novr + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_zspmat_type), Intent(inout) :: blk + integer, intent(out) :: info + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + character(len=5), optional :: outfmt + + + real(kind(1.d0)) :: t1,t2,t3,mpi_wtime + external mpi_wtime + integer icomm + + ! .. Local Scalars .. + Integer :: k, np,me,m,nnzero,& + & ictxt, n_col,ier,n,int_err(5),& + & tot_recv, ircode, n_row,nhalo, nrow_a,err_act + Logical,Parameter :: debug=.false., debugprt=.false. + character(len=20) :: name, ch_err + name='psb_zasmatbld' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + If(debug) Write(0,*)'IN DASMATBLD ', upd + ictxt=desc_data%matrix_data(psb_ctxt_) + tot_recv=0 + + nrow_a = desc_data%matrix_data(psb_n_row_) + nnzero = Size(a%aspk) + n_col = desc_data%matrix_data(psb_n_col_) + nhalo = n_col-nrow_a + + + If (ptype == bja_) Then + ! + ! Block Jacobi. Copy the descriptor, just in case we want to + ! do the renumbering. + ! + If(debug) Write(0,*)' asmatbld calling allocate ' + call psb_sp_all(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida = 'COO' + blk%infoa(psb_nnz_) = 0 + If(debug) Write(0,*)' asmatbld done spallocate' + If (upd == 'F') Then + call psb_cdcpy(desc_data,desc_p,info) + If(debug) Write(0,*)' asmatbld done cdcpy' + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + + Else If (ptype == asm_) Then + + + ! + ! Additive Schwarz variant. + ! + ! + + ictxt=desc_data%matrix_data(psb_ctxt_) + + if (novr < 0) then + info=3 + int_err(1)=novr + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (novr == 0) then + ! + ! This is really just Block Jacobi..... + ! + If(debug) Write(0,*)' asmatbld calling allocate novr=0' + call psb_sp_all(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida='COO' + blk%infoa(psb_nnz_)=0 + if (debug) write(0,*) 'Calling desccpy' + if (upd == 'F') then + call psb_cdcpy(desc_data,desc_p,info) + If(debug) Write(0,*)' asmatbld done cdcpy' + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'Early return from asmatbld: P>=3 N_OVR=0' + endif + return + endif + + call psb_get_mpicomm(ictxt,icomm) + + Call psb_info(ictxt, me, np) + If(debug)Write(0,*)'BEGIN dasmatbld',me,upd,novr + t1 = mpi_wtime() + + If (upd == 'F') Then + ! + ! Build the auiliary descriptor',desc_p%matrix_data(psb_n_row_) + ! + call psb_cdbldovr(a,desc_data,novr,desc_p,info) + if(info /= 0) then + info=4010 + ch_err='psb_cdbldovr' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + Endif + + if(debug) write(0,*) me,' From cdovr _:',desc_p%matrix_data(psb_n_row_),desc_p%matrix_data(psb_n_col_) + + + n_row = desc_p%matrix_data(psb_n_row_) + t2 = mpi_wtime() + + if (debug) write(0,*) 'Before sphalo ',blk%fida,blk%m,psb_nnz_,blk%infoa(psb_nnz_) + + if (present(outfmt)) then + if(debug) write(0,*) me,': Calling SPHALO with ',size(blk%ia2) + Call psb_sphalo(a,desc_p,blk,info,outfmt=outfmt) + else + if(debug) write(0,*) me,': Calling SPHALO with ',size(blk%ia2) + Call psb_sphalo(a,desc_p,blk,info) + end if + + + if(info /= 0) then + info=4010 + ch_err='psb_sphalo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) 'After psb_sphalo ',blk%fida,blk%m,psb_nnz_,blk%infoa(psb_nnz_) + + t3 = mpi_wtime() + if (debugprt) then + open(40+me) + call psb_csprt(40+me,blk,head='% Ovrlap rows') + close(40+me) + endif + + + End If + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +End Subroutine psb_zasmatbld + diff --git a/psb_zbaseprc_aply.f90 b/psb_zbaseprc_aply.f90 new file mode 100644 index 00000000..951aa47e --- /dev/null +++ b/psb_zbaseprc_aply.f90 @@ -0,0 +1,280 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a a basic preconditioner stored in prec + ! + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + complex(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + complex(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col, int_err(5) + complex(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:) + character ::diagl, diagu + integer :: ictxt,np,me,i, isz, nrg, err_act + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name, ch_err + + interface psb_bjac_aply + subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type), intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + complex(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + complex(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_zbjac_aply + end interface + + name='psb_zbaseprc_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + info=40 + int_err(1)=6 + ch_err(2:2)=trans + goto 9999 + end select + + select case(prec%iprcparm(p_type_)) + + case(noprec_) + + call psb_geaxpby(alpha,x,beta,y,desc_data,info) + + case(diagsc_) + + if (size(work) >= size(x)) then + ww => work + else + allocate(ww(size(x)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + end if + + n_row=desc_data%matrix_data(psb_n_row_) + ww(1:n_row) = x(1:n_row)*prec%d(1:n_row) + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + if (size(work) < size(x)) then + deallocate(ww,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Deallocate') + goto 9999 + end if + end if + + case(bja_) + + call psb_bjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjac_aply' + goto 9999 + end if + + case(asm_,ras_,ash_,rash_) + + if (prec%iprcparm(n_ovr_)==0) then + ! shortcut: this fixes performance for RAS(0) == BJA + call psb_bjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjacaply' + goto 9999 + end if + + else + ! Note: currently trans is unused. + n_row=prec%desc_data%matrix_data(psb_n_row_) + n_col=prec%desc_data%matrix_data(psb_n_col_) + + isz=max(n_row,N_COL) + if ((6*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + aux => work(3*isz+1:) + else if ((4*isz) <= size(work)) then + aux => work(1:) + allocate(ww(isz),tx(isz),ty(isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + else if ((3*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + allocate(aux(4*isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + else + allocate(ww(isz),tx(isz),ty(isz),& + &aux(4*isz),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + + if (debugprt) write(0,*)' vdiag: ',prec%d(:) + if (debug) write(0,*) 'Bi-CGSTAB with Additive Schwarz prec' + + tx(1:desc_data%matrix_data(psb_n_row_)) = x(1:desc_data%matrix_data(psb_n_row_)) + tx(desc_data%matrix_data(psb_n_row_)+1:isz) = zzero + + if (prec%iprcparm(restr_)==psb_halo_) then + call psb_halo(tx,prec%desc_data,info,work=aux) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + else if (prec%iprcparm(restr_) /= psb_none_) then + write(0,*) 'Problem in PRC_APLY: Unknown value for restriction ',& + &prec%iprcparm(restr_) + end if + + if (prec%iprcparm(iren_)>0) then + call zgelp('N',n_row,1,prec%perm,tx,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_zgelp' + goto 9999 + end if + endif + + call psb_bjac_aply(zone,prec,tx,zzero,ty,prec%desc_data,trans,aux,info) + if(info.ne.0) then + info=4010 + ch_err='psb_bjac_aply' + goto 9999 + end if + + if (prec%iprcparm(iren_)>0) then + call zgelp('N',n_row,1,prec%invperm,ty,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_zgelp' + goto 9999 + end if + endif + + select case (prec%iprcparm(prol_)) + + case(psb_none_) + ! Would work anyway, but since it's supposed to do nothing... + ! call f90_psovrl(ty,prec%desc_data,update=prec%a_restrict) + + case(psb_sum_,psb_avg_) + call psb_ovrl(ty,prec%desc_data,info,& + & update=prec%iprcparm(prol_),work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + + case default + write(0,*) 'Problem in PRC_APLY: Unknown value for prolongation ',& + & prec%iprcparm(prol_) + end select + + call psb_geaxpby(alpha,ty,beta,y,desc_data,info) + + + if ((6*isz) <= size(work)) then + else if ((4*isz) <= size(work)) then + deallocate(ww,tx,ty) + else if ((3*isz) <= size(work)) then + deallocate(aux) + else + deallocate(ww,aux,tx,ty) + endif + end if + case default + write(0,*) 'Invalid PRE%PREC ',prec%iprcparm(p_type_),':',& + & min_prec_,noprec_,diagsc_,bja_,& + & asm_,ras_,ash_,rash_ + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zbaseprc_aply + diff --git a/psb_zbaseprc_bld.f90 b/psb_zbaseprc_bld.f90 new file mode 100644 index 00000000..2c493f61 --- /dev/null +++ b/psb_zbaseprc_bld.f90 @@ -0,0 +1,262 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zbaseprc_bld(a,desc_a,p,info,upd) + + use psb_base_mod + use psb_prec_type + Implicit None + + type(psb_zspmat_type), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_zbaseprc_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + + interface psb_diagsc_bld + subroutine psb_zdiagsc_bld(a,desc_data,p,upd,info) + use psb_base_mod + use psb_prec_type + integer, intent(out) :: info + type(psb_zspmat_type), intent(in), target :: a + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(inout) :: p + character, intent(in) :: upd + end subroutine psb_zdiagsc_bld + end interface + + interface psb_ilu_bld + subroutine psb_zilu_bld(a,desc_data,p,upd,info) + use psb_base_mod + use psb_prec_type + integer, intent(out) :: info + type(psb_zspmat_type), intent(in), target :: a + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(inout) :: p + character, intent(in) :: upd + end subroutine psb_zilu_bld + end interface + + interface psb_slu_bld + subroutine psb_zslu_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_zslu_bld + end interface + + interface psb_umf_bld + subroutine psb_zumf_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_zumf_bld + end interface + + ! Local scalars + Integer :: err, nnzero, n_row, n_col,I,j,k,ictxt,& + & me,mycol,np,npcol,mglob,lw, mtype, nrg, nzg, err_act + real(kind(1.d0)) :: temp, real_err(5) + real(kind(1.d0)),pointer :: gd(:), work(:) + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_baseprc_bld' + + if (debug) write(0,*) 'Entering baseprc_bld' + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + mglob = psb_cd_get_global_rows(desc_a) + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' + call psb_info(ictxt, me, np) + + if (present(upd)) then + if (debug) write(0,*) 'UPD ', upd + if ((UPD.eq.'F').or.(UPD.eq.'T')) then + IUPD=UPD + else + IUPD='F' + endif + else + IUPD='F' + endif + + ! + ! Should add check to ensure all procs have the same... + ! + ! ALso should define symbolic names for the preconditioners. + ! + + call psb_check_def(p%iprcparm(p_type_),'base_prec',& + & diagsc_,is_legal_base_prec) + +!!$ allocate(p%desc_data,stat=info) +!!$ if (info /= 0) then +!!$ call psb_errpush(4010,name,a_err='Allocate') +!!$ goto 9999 +!!$ end if +!!$ +!!$ call psb_nullify_desc(p%desc_data) + + select case(p%iprcparm(p_type_)) + case (noprec_) + ! Do nothing. + call psb_cdcpy(desc_a,p%desc_data,info) + if(info /= 0) then + info=4010 + ch_err='psb_cdcpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case (diagsc_) + + call psb_diagsc_bld(a,desc_a,p,iupd,info) + if(debug) write(0,*)me,': out of psb_diagsc_bld' + if(info /= 0) then + info=4010 + ch_err='psb_diagsc_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case (bja_,asm_) + + call psb_check_def(p%iprcparm(n_ovr_),'overlap',& + & 0,is_legal_n_ovr) + call psb_check_def(p%iprcparm(restr_),'restriction',& + & psb_halo_,is_legal_restrict) + call psb_check_def(p%iprcparm(prol_),'prolongator',& + & psb_none_,is_legal_prolong) + call psb_check_def(p%iprcparm(iren_),'renumbering',& + & renum_none_,is_legal_renum) + call psb_check_def(p%iprcparm(f_type_),'fact',& + & f_ilu_n_,is_legal_ml_fact) + + if (debug) write(0,*)me, ': Calling PSB_ILU_BLD' + if (debug) call psb_barrier(ictxt) + + select case(p%iprcparm(f_type_)) + + case(f_ilu_n_,f_ilu_e_) + call psb_ilu_bld(a,desc_a,p,iupd,info) + if(debug) write(0,*)me,': out of psb_ilu_bld' + if (debug) call psb_barrier(ictxt) + if(info /= 0) then + info=4010 + ch_err='psb_ilu_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_slu_) + + if(debug) write(0,*)me,': calling slu_bld' + call psb_slu_bld(a,desc_a,p,info) + if(info /= 0) then + info=4010 + ch_err='slu_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_umf_) + if(debug) write(0,*)me,': calling umf_bld' + call psb_umf_bld(a,desc_a,p,info) + if(debug) write(0,*)me,': Done umf_bld ',info + if(info /= 0) then + info=4010 + ch_err='umf_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_none_) + write(0,*) 'Fact=None in BASEPRC_BLD Bja/ASM??' + info=4010 + ch_err='Inconsistent prec f_none_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + case default + write(0,*) 'Unknown factor type in baseprc_bld bja/asm: ',& + &p%iprcparm(f_type_) + info=4010 + ch_err='Unknown f_type_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end select + case default + info=4010 + ch_err='Unknown p_type_' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end select + + p%base_a => a + p%base_desc => desc_a + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zbaseprc_bld + diff --git a/psb_zbjac_aply.f90 b/psb_zbjac_aply.f90 new file mode 100644 index 00000000..9ef190e7 --- /dev/null +++ b/psb_zbjac_aply.f90 @@ -0,0 +1,270 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zbjac_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a a Block Jacobi preconditioner stored in prec + ! Note that desc_data may or may not be the same as prec%desc_data, + ! but since both are INTENT(IN) this should be legal. + ! + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type), intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + complex(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + complex(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col + complex(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:),tb(:) + character ::diagl, diagu + integer :: ictxt,np,me,i, isz, nrg, err_act, int_err(5) + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name, ch_err + + name='psb_bjac_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + call psb_errpush(40,name) + goto 9999 + end select + + + n_row=desc_data%matrix_data(psb_n_row_) + n_col=desc_data%matrix_data(psb_n_col_) + + if (n_col <= size(work)) then + ww => work(1:n_col) + if ((4*n_col+n_col) <= size(work)) then + aux => work(n_col+1:) + else + allocate(aux(4*n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + else + allocate(ww(n_col),aux(4*n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + endif + + + if (prec%iprcparm(jac_sweeps_) == 1) then + + + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + + select case(trans) + case('N','n') + + call psb_spsm(zone,prec%av(l_pr_),x,zzero,ww,desc_data,info,& + & trans='N',unit=diagl,choice=psb_none_,work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(alpha,prec%av(u_pr_),ww,beta,y,desc_data,info,& + & trans='N',unit=diagu,choice=psb_none_, work=aux) + if(info /=0) goto 9999 + + case('T','t','C','c') + call psb_spsm(zone,prec%av(u_pr_),x,zzero,ww,desc_data,info,& + & trans=trans,unit=diagu,choice=psb_none_, work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(alpha,prec%av(l_pr_),ww,beta,y,desc_data,info,& + & trans=trans,unit=diagl,choice=psb_none_,work=aux) + if(info /=0) goto 9999 + + end select + + case(f_slu_) + + ww(1:n_row) = x(1:n_row) + + select case(trans) + case('N','n') + call psb_zslu_solve(0,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + case('T','t','C','c') + call psb_zslu_solve(1,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + end select + + if(info /=0) goto 9999 + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + case (f_umf_) + + + select case(trans) + case('N','n') + call psb_zumf_solve(0,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) + case('T','t','C','c') + call psb_zumf_solve(1,n_row,ww,x,n_row,prec%iprcparm(umf_numptr_),info) + end select + + if(info /=0) goto 9999 + + call psb_geaxpby(alpha,ww,beta,y,desc_data,info) + + case default + write(0,*) 'Unknown factorization type in bjac_aply',prec%iprcparm(f_type_) + end select + if (debugprt) write(0,*)' Y: ',y(:) + + else if (prec%iprcparm(jac_sweeps_) > 1) then + + ! Note: we have to add TRANS to this one !!!!!!!!! + + if (size(prec%av) < ap_nd_) then + info = 4011 + goto 9999 + endif + + allocate(tx(n_col),ty(n_col),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + tx = zzero + ty = zzero + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + call psb_spsm(zone,prec%av(l_pr_),ty,zzero,ww,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if(info /=0) goto 9999 + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(zone,prec%av(u_pr_),ww,zzero,tx,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=psb_none_,work=aux) + if(info /=0) goto 9999 + end do + + case(f_slu_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + + call psb_zslu_solve(0,n_row,1,ty,n_row,prec%iprcparm(slu_ptr_),info) + if(info /=0) goto 9999 + tx(1:n_row) = ty(1:n_row) + end do + case(f_umf_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-zone,prec%av(ap_nd_),tx,zone,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + + call psb_zumf_solve(0,n_row,ww,ty,n_row,& + & prec%iprcparm(umf_numptr_),info) + if(info /=0) goto 9999 + tx(1:n_row) = ww(1:n_row) + end do + + end select + + call psb_geaxpby(alpha,tx,beta,y,desc_data,info) + + + deallocate(tx,ty) + + + else + + goto 9999 + + endif + + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then + else + deallocate(aux) + endif + else + deallocate(ww,aux) + endif + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zbjac_aply + diff --git a/psb_zbldaggrmat.f90 b/psb_zbldaggrmat.f90 new file mode 100644 index 00000000..65c461fd --- /dev/null +++ b/psb_zbldaggrmat.f90 @@ -0,0 +1,1041 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zbldaggrmat(a,desc_a,ac,desc_ac,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_zspmat_type), intent(in), target :: a + type(psb_zbaseprc_type), intent(inout),target :: p + type(psb_zspmat_type), intent(inout), target :: ac + type(psb_desc_type), intent(in) :: desc_a + type(psb_desc_type), intent(inout) :: desc_ac + integer, intent(out) :: info + + logical, parameter :: aggr_dump=.false. + integer ::ictxt,np,me, err_act + character(len=20) :: name, ch_err + name='psb_zbldaggrmat' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + select case (p%iprcparm(smth_kind_)) + case (no_smth_) + + call raw_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='raw_aggregate') + goto 9999 + end if + if (aggr_dump) call psb_csprt(90+me,ac,head='% Raw aggregate.') + + case(smth_omg_,smth_biz_) + + call smooth_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='smooth_aggregate') + goto 9999 + end if + case default + call psb_errpush(4010,name,a_err=name) + goto 9999 + + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine raw_aggregate(info) + use psb_base_mod + use psb_prec_type + use mpi + implicit none + + integer, intent(out) :: info + type(psb_zspmat_type) :: b + integer, pointer :: nzbr(:), idisp(:) + integer :: ictxt, nrow, nglob, ncol, ntaggr, nzac, ip, ndx,& + & naggr, np, me, nzt,jl,nzl,nlr,& + & icomm,naggrm1, i, j, k, err_act + + name='raw_aggregate' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + call psb_nullify_sp(b) + + ictxt = psb_cd_get_context(desc_a) + call psb_info(ictxt, me, np) + nglob = psb_cd_get_global_rows(desc_a) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + naggr = p%nlaggr(me+1) + ntaggr = sum(p%nlaggr) + allocate(nzbr(np), idisp(np),stat=info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + naggrm1=sum(p%nlaggr(1:me)) + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + do i=1, nrow + p%mlia(i) = p%mlia(i) + naggrm1 + end do + call psb_halo(p%mlia,desc_a,info) + end if + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_halo') + goto 9999 + end if + + nzt = psb_sp_get_nnzeros(a) + + call psb_sp_all(b,nzt,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + call psb_sp_setifld(psb_dupl_ovwrt_,psb_dupl_,b,info) + call psb_sp_setifld(psb_upd_dflt_,psb_upd_,b,info) + b%fida = 'COO' + b%m=a%m + b%k=a%k + call psb_csdp(a,b,info) + if(info /= 0) then + info=4010 + ch_err='psb_csdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzt = psb_sp_get_nnzeros(b) + + j = 0 + do i=1, nzt + if ((1<=b%ia2(i)).and.(b%ia2(i)<=nrow)) then + j = j + 1 + b%aspk(j) = b%aspk(i) + b%ia1(j) = p%mlia(b%ia1(i)) + b%ia2(j) = p%mlia(b%ia2(i)) + end if + enddo + b%infoa(psb_nnz_)=j + call psb_fixcoo(b,info) + + nzt = psb_sp_get_nnzeros(b) + + call psb_sp_reall(b,nzt,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spreall') + goto 9999 + end if + b%m = naggr + b%k = naggr + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = nzt + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_complex,ac%aspk,nzbr,idisp,& + & mpi_double_complex,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + else if (p%iprcparm(coarse_mat_) == mat_distr_) then + + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + + call psb_sp_clone(b,ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + !if(.not.associated(p%av(ap_nd_)%aspk)) p%iprcparm(jac_sweeps_) = 1 + !------------------------------------------------------------------ + ! Split AC=M+N N off-diagonal part + call psb_sp_all(ac%m,ac%k,p%av(ap_nd_),nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + if(.not.allocated(p%av(ap_nd_)%aspk)) write(0,*) '.not.associated(p%av(ap_nd_)%ia1)' + if(.not.allocated(p%av(ap_nd_)%ia1)) write(0,*) '.not.associated(p%av(ap_nd_)%ia1)' + !write(0,*) 'ok line 238' + + k=0 + do i=1,nzl + if (ac%ia2(i)>ac%m) then + k = k + 1 + p%av(ap_nd_)%aspk(k) = ac%aspk(i) + p%av(ap_nd_)%ia1(k) = ac%ia1(i) + p%av(ap_nd_)%ia2(k) = ac%ia2(i) + endif + enddo + p%av(ap_nd_)%infoa(psb_nnz_) = k + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + !write(0,*) 'operations in bldaggrmat are ok !' + !------------------------------------------------------------------ + + call psb_ipcoo2csr(p%av(ap_nd_),info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + else + + write(0,*) 'Unknown p%iprcparm(coarse_mat) in aggregate_sp',p%iprcparm(coarse_mat_) + end if + + call psb_ipcoo2csr(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + deallocate(nzbr,idisp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine raw_aggregate + + + + subroutine smooth_aggregate(info) + use psb_base_mod + use psb_prec_type + use mpi + implicit none + + integer, intent(out) :: info + + type(psb_zspmat_type) :: b + integer, pointer :: nzbr(:), idisp(:), ivall(:) + integer :: ictxt, nrow, nglob, ncol, ntaggr, nzac, ip, ndx,& + & naggr, np, me, & + & icomm, naggrm1,naggrp1,i,j,err_act,k,nzl + type(psb_zspmat_type), pointer :: am1,am2 + type(psb_zspmat_type) :: am3,am4 + logical :: ml_global_nmb + + logical, parameter :: test_dump=.false., debug=.false. + integer, parameter :: ncmax=16 + real(kind(1.d0)) :: omega, anorm, tmp, dg + character(len=20) :: name + + + name='smooth_aggregate' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + + ictxt = psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + call psb_nullify_sp(b) + call psb_nullify_sp(am3) + call psb_nullify_sp(am4) + + am2 => p%av(sm_pr_t_) + am1 => p%av(sm_pr_) + + nglob = psb_cd_get_global_rows(desc_a) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + naggr = p%nlaggr(me+1) + ntaggr = sum(p%nlaggr) + + allocate(nzbr(np), idisp(np),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + naggrm1 = sum(p%nlaggr(1:me)) + naggrp1 = sum(p%nlaggr(1:me+1)) + + ml_global_nmb = ( (p%iprcparm(smth_kind_) == smth_omg_).or.& + & ( (p%iprcparm(smth_kind_) == smth_biz_).and.& + & (p%iprcparm(coarse_mat_) == mat_repl_)) ) + + + if (ml_global_nmb) then + p%mlia(1:nrow) = p%mlia(1:nrow) + naggrm1 + call psb_halo(p%mlia,desc_a,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='f90_pshalo') + goto 9999 + end if + end if + + if (aggr_dump) then + open(30+me) + write(30+me,*) '% Aggregation map' + do i=1,ncol + write(30+me,*) i,p%mlia(i) + end do + close(30+me) + end if + + ! naggr: number of local aggregates + ! nrow: local rows. + ! + allocate(p%dorig(nrow),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + ! Get diagonal D + call psb_sp_getdiag(a,p%dorig,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_getdiag') + goto 9999 + end if + + do i=1,size(p%dorig) + if (p%dorig(i) /= zzero) then + p%dorig(i) = zone / p%dorig(i) + else + p%dorig(i) = zone + end if + end do + + ! where (p%dorig /= dzero) + ! p%dorig = done / p%dorig + ! elsewhere + ! p%dorig = done + ! end where + + + ! 1. Allocate Ptilde in sparse matrix form + am4%fida='COO' + am4%m=ncol + if (ml_global_nmb) then + am4%k=ntaggr + call psb_sp_all(ncol,ntaggr,am4,ncol,info) + else + am4%k=naggr + call psb_sp_all(ncol,naggr,am4,ncol,info) + endif + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + if (ml_global_nmb) then + do i=1,ncol + am4%aspk(i) = zone + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(psb_nnz_) = ncol + else + do i=1,nrow + am4%aspk(i) = zone + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(psb_nnz_) = nrow + endif + + + + + call psb_ipcoo2csr(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + call psb_sp_clone(a,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + + ! + ! WARNING: the cycles below assume that AM3 does have + ! its diagonal elements stored explicitly!!! + ! Should we switch to something safer? + ! + call psb_sp_scal(am3,p%dorig,info) + if(info /= 0) goto 9999 + + if (p%iprcparm(om_choice_) == lib_choice_) then + + if (p%iprcparm(smth_kind_) == smth_biz_) then + + ! + ! This only works with CSR. + ! + anorm = dzero + dg = done + do i=1,am3%m + tmp = dzero + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) <= am3%m) then + tmp = tmp + abs(am3%aspk(j)) + endif + if (am3%ia1(j) == i ) then + dg = abs(am3%aspk(j)) + end if + end do + anorm = max(anorm,tmp/dg) + enddo + + call psb_amx(ictxt,anorm) + else + anorm = psb_spnrmi(am3,desc_a,info) + endif + omega = 4.d0/(3.d0*anorm) + p%dprcparm(smooth_omega_) = omega + + else if (p%iprcparm(om_choice_) == user_choice_) then + + omega = p%dprcparm(smooth_omega_) + + else if (p%iprcparm(om_choice_) /= user_choice_) then + write(0,*) me,'Error: invalid choice for OMEGA in blaggrmat?? ',& + & p%iprcparm(om_choice_) + end if + + + if (am3%fida=='CSR') then + do i=1,am3%m + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) == i) then + am3%aspk(j) = done - omega*am3%aspk(j) + else + am3%aspk(j) = - omega*am3%aspk(j) + end if + end do + end do + else if (am3%fida=='COO') then + do j=1,am3%infoa(psb_nnz_) + if (am3%ia1(j) /= am3%ia2(j)) then + am3%aspk(j) = - omega*am3%aspk(j) + else + am3%aspk(j) = done - omega*am3%aspk(j) + endif + end do + call psb_ipcoo2csr(am3,info) + else + write(0,*) 'Missing implementation of I sum' + call psb_errpush(4010,name) + goto 9999 + end if + + if (test_dump) then + open(30+me) + write(30+me,*) 'OMEGA: ',omega + do i=1,size(p%dorig) + write(30+me,*) p%dorig(i) + end do + close(30+me) + end if + + if (test_dump) call & + & psb_csprt(20+me,am4,head='% Operator Ptilde.',ivr=desc_a%loc_to_glob) + if (test_dump) call psb_csprt(40+me,am3,head='% (I-wDA)',ivr=desc_a%loc_to_glob,& + & ivc=desc_a%loc_to_glob) + if (debug) write(0,*) me,'Done gather, going for SYMBMM 1' + ! + ! Symbmm90 does the allocation for its result. + ! + ! am1 = (i-wDA)Ptilde + ! Doing it this way means to consider diag(Ai) + ! + ! + call psb_symbmm(am3,am4,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 1') + goto 9999 + end if + + call psb_numbmm(am3,am4,am1) + + if (debug) write(0,*) me,'Done NUMBMM 1' + + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + if (ml_global_nmb) then + ! + ! Now we have to gather the halo of am1, and add it to itself + ! to multiply it by A, + ! + call psb_sphalo(am1,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sphalo') + goto 9999 + end if + + call psb_rwextd(ncol,am1,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + else + + call psb_rwextd(ncol,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='rwextd') + goto 9999 + end if + endif + + if (test_dump) & + & call psb_csprt(60+me,am1,head='% (I-wDA)Pt',ivr=desc_a%loc_to_glob) + + call psb_symbmm(a,am1,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 2') + goto 9999 + end if + + call psb_numbmm(a,am1,am3) + if (debug) write(0,*) me,'Done NUMBMM 2' + + if (p%iprcparm(smth_kind_) == smth_omg_) then + call psb_transc(am1,am2,fmt='COO') + nzl = am2%infoa(psb_nnz_) + i=0 + ! + ! Now we have to fix this. The only rows of B that are correct + ! are those corresponding to "local" aggregates, i.e. indices in p%mlia(:) + ! + do k=1, nzl + if ((naggrm1 < am2%ia1(k)) .and.(am2%ia1(k) <= naggrp1)) then + i = i+1 + am2%aspk(i) = am2%aspk(k) + am2%ia1(i) = am2%ia1(k) + am2%ia2(i) = am2%ia2(k) + end if + end do + + am2%infoa(psb_nnz_) = i + call psb_ipcoo2csr(am2,info) + else + call psb_transc(am1,am2) + endif + if (debug) write(0,*) me,'starting sphalo/ rwxtd' + + if (p%iprcparm(smth_kind_) == smth_omg_) then + ! am2 = ((i-wDA)Ptilde)^T + call psb_sphalo(am3,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sphalo') + goto 9999 + end if + call psb_rwextd(ncol,am3,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + call psb_sp_free(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + else if (p%iprcparm(smth_kind_) == smth_biz_) then + + call psb_rwextd(ncol,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + endif + + if (debug) write(0,*) me,'starting symbmm 3' + call psb_symbmm(am2,am3,b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 3') + goto 9999 + end if + + if (debug) write(0,*) me,'starting numbmm 3' + call psb_numbmm(am2,am3,b) + if (debug) write(0,*) me,'Done NUMBMM 3' + +!!$ if (aggr_dump) call csprt(50+me,am1,head='% Operator PTrans.') + call psb_sp_free(am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + call psb_ipcsr2coo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcsr2coo') + goto 9999 + end if + + call psb_fixcoo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='fixcoo') + goto 9999 + end if + + + if (test_dump) call psb_csprt(80+me,b,head='% Smoothed aggregate AC.') + + select case(p%iprcparm(smth_kind_)) + + case(smth_omg_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_sp_clone(b,ac,info) + if(info /= 0) goto 9999 + nzac = ac%infoa(psb_nnz_) + nzl = ac%infoa(psb_nnz_) + + allocate(ivall(ntaggr),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + i = 1 + do ip=1,np + do k=1, p%nlaggr(ip) + ivall(i) = ip + i = i + 1 + end do + end do + + call psb_cdall(ictxt,desc_ac,info,vg=ivall(1:ntaggr),flag=1) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + + + call psb_cdins(nzl,ac%ia1,ac%ia2,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdins') + goto 9999 + end if + + if (debug) write(0,*) me,'Created aux descr. distr.' + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + + + if (debug) write(0,*) me,'Asmbld aux descr. distr.' + + call psb_glob_to_loc(ac%ia1(1:nzl),desc_ac,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + call psb_glob_to_loc(ac%ia2(1:nzl),desc_ac,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + ac%m=desc_ac%matrix_data(psb_n_row_) + ac%k=desc_ac%matrix_data(psb_n_col_) + ac%fida='COO' + ac%descra='G' + + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + + deallocate(ivall,nzbr,idisp) + + ! Split AC=M+N N off-diagonal part + call psb_sp_all(ac%m,ac%k,p%av(ap_nd_),nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + + k=0 + do i=1,nzl + if (ac%ia2(i)>ac%m) then + k = k + 1 + p%av(ap_nd_)%aspk(k) = ac%aspk(i) + p%av(ap_nd_)%ia1(k) = ac%ia1(i) + p%av(ap_nd_)%ia2(k) = ac%ia2(i) + endif + enddo + p%av(ap_nd_)%infoa(psb_nnz_) = k + call psb_ipcoo2csr(p%av(ap_nd_),info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + call psb_sum(ictxt,k) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + + + if (np>1) then + nzl = psb_sp_get_nnzeros(am1) + call psb_glob_to_loc(am1%ia1(1:nzl),desc_ac,info,'I') + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + endif + am1%k=desc_ac%matrix_data(psb_n_col_) + + if (np>1) then + call psb_ipcsr2coo(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcsr2coo') + goto 9999 + end if + + nzl = am2%infoa(psb_nnz_) + call psb_glob_to_loc(am2%ia1(1:nzl),desc_ac,info,'I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + + call psb_ipcoo2csr(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + end if + am2%m=desc_ac%matrix_data(psb_n_col_) + + case(mat_repl_) + ! + ! + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = b%infoa(psb_nnz_) + + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) goto 9999 + + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_complex,ac%aspk,nzbr,idisp,& + & mpi_double_complex,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) goto 9999 + + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) goto 9999 + call psb_sp_free(b,info) + if(info /= 0) goto 9999 + if (me==0) then + if (test_dump) call psb_csprt(80+me,ac,head='% Smoothed aggregate AC.') + endif + + deallocate(nzbr,idisp) + + case default + write(0,*) 'Inconsistent input in smooth_new_aggregate' + end select + + + case(smth_biz_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_sp_clone(b,ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdall') + goto 9999 + end if + call psb_cdasb(desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdasb') + goto 9999 + end if + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_free') + goto 9999 + end if + + + case(mat_repl_) + ! + ! + + call psb_cdrep(ntaggr,ictxt,desc_ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_cdrep') + goto 9999 + end if + + nzbr(:) = 0 + nzbr(me+1) = b%infoa(psb_nnz_) + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + call psb_sp_all(ntaggr,ntaggr,ac,nzac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_all') + goto 9999 + end if + + call psb_get_mpicomm(ictxt,icomm) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_complex,ac%aspk,nzbr,idisp,& + & mpi_double_complex,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + + ac%m = ntaggr + ac%k = ntaggr + ac%infoa(psb_nnz_) = nzac + ac%fida='COO' + ac%descra='G' + call psb_fixcoo(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_fixcoo') + goto 9999 + end if + call psb_sp_free(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + end select + deallocate(nzbr,idisp) + + end select + + call psb_ipcoo2csr(ac,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + + if (debug) write(0,*) me,'Done smooth_aggregate ' + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + + end subroutine smooth_aggregate + + + +end subroutine psb_zbldaggrmat diff --git a/psb_zdiagsc_bld.f90 b/psb_zdiagsc_bld.f90 new file mode 100644 index 00000000..b925954f --- /dev/null +++ b/psb_zdiagsc_bld.f90 @@ -0,0 +1,164 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zdiagsc_bld(a,desc_a,p,upd,info) + + use psb_base_mod + use psb_prec_type + Implicit None + + type(psb_zspmat_type), target :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type),intent(inout) :: p + character, intent(in) :: upd + integer, intent(out) :: info + + + ! Local scalars + Integer :: err, n_row, n_col,I,j,k,ictxt,& + & me,np,mglob,lw, err_act + complex(kind(1.d0)),pointer :: gd(:), work(:) + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_diagsc_bld' + + if (debug) write(0,*) 'Entering diagsc_bld' + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + mglob = psb_cd_get_global_rows(desc_a) + + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' + call psb_info(ictxt, me, np) + + if (debug) write(0,*) 'Precond: Diagonal scaling' + ! diagonal scaling + + call psb_realloc(n_col,p%d,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_realloc') + goto 9999 + end if + + call psb_csrws(p%d,a,info,trans='N') + if(info /= 0) then + info=4010 + ch_err='psb_csrws' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(ilout+me,*) 'VDIAG ',n_row + do i=1,n_row + if (p%d(i) == zzero) then + p%d(i) = zone + else + p%d(i) = zone/p%d(i) + endif + + if (debug) write(ilout+me,*) i,desc_a%loc_to_glob(i), p%d(i) +!!$ if (p%d(i).lt.0.d0) then +!!$ write(0,*) me,'Negative RWS? ',i,p%d(i) +!!$ endif + end do + if (a%pl(1) /= 0) then + allocate(work(n_row),stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + call psb_gelp('n',a%pl,p%d,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='psb_zgelp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(work) + endif + + if (debug) then + allocate(gd(mglob),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_gather(gd, p%d, desc_a, info, iroot=iroot) + if(info /= 0) then + info=4010 + ch_err='psb_zgatherm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (me.eq.iroot) then + write(iout+np,*) 'VDIAG CHECK ',mglob + do i=1,mglob + write(iout+np,*) i,gd(i) + enddo + endif + deallocate(gd) + endif + if (debug) write(*,*) 'Preconditioner DIAG computed OK' + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zdiagsc_bld + diff --git a/psb_zgenaggrmap.f90 b/psb_zgenaggrmap.f90 new file mode 100644 index 00000000..f89b00ca --- /dev/null +++ b/psb_zgenaggrmap.f90 @@ -0,0 +1,292 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_base_mod + use psb_prec_type + implicit none + integer, intent(in) :: aggr_type + type(psb_zspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, allocatable :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + ! Locals + integer, allocatable :: ils(:), neigh(:) + integer :: icnt,nlp,k,n,ia,isz,nr, naggr,i,j,m + + logical :: recovery + logical, parameter :: debug=.false. + integer ::ictxt,np,me,err_act + integer :: nrow, ncol, n_ne + integer, parameter :: one=1, two=2 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + name = 'psb_bldaggrmat' + call psb_erractionsave(err_act) + ! + ! Note. At the time being we are ignoring aggr_type + ! so that we only have local decoupled aggregation. This might + ! change in the future. + ! + ictxt=psb_cd_get_context(desc_a) + call psb_info(ictxt,me,np) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + nr = a%m + allocate(ilaggr(nr),neigh(nr),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do i=1, nr + ilaggr(i) = -(nr+1) + end do + ! Note: -(nr+1) Untouched as yet + ! -i 1<=i<=nr Adjacent to aggregate i + ! i 1<=i<=nr Belonging to aggregate i + + ! + ! Phase one: group nodes together. + ! Very simple minded strategy. + ! + naggr = 0 + nlp = 0 + do + icnt = 0 + do i=1, nr + if (ilaggr(i) == -(nr+1)) then + ! + ! 1. Untouched nodes are marked >0 together + ! with their neighbours + ! + icnt = icnt + 1 + naggr = naggr + 1 + ilaggr(i) = naggr + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + do k=1, n_ne + j = neigh(k) + if ((1<=j).and.(j<=nr)) then + ilaggr(j) = naggr +!!$ if (ilaggr(j) < 0) ilaggr(j) = naggr +!!$ if (ilaggr(j) == -(nr+1)) ilaggr(j) = naggr + endif + enddo + ! + ! 2. Untouched neighbours of these nodes are marked <0. + ! + call psb_neigh(a,i,neigh,n_ne,info,lev=two) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do n = 1, n_ne + m = neigh(n) + if ((1<=m).and.(m<=nr)) then + if (ilaggr(m) == -(nr+1)) ilaggr(m) = -naggr + endif + enddo + endif + enddo + nlp = nlp + 1 + if (icnt == 0) exit + enddo + if (debug) then + write(0,*) 'Check 1:',count(ilaggr == -(nr+1)),(a%ia1(i),i=a%ia2(1),a%ia2(2)-1) + end if + + ! + ! Phase two: sweep over leftovers. + ! + allocate(ils(naggr+10),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + do i=1, size(ils) + ils(i) = 0 + end do + do i=1, nr + n = ilaggr(i) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 1 ? ',n,naggr + else + ils(n) = ils(n) + 1 + end if + + end if + end do + if (debug) then + write(0,*) 'Phase 1: number of aggregates ',naggr + write(0,*) 'Phase 1: nodes aggregated ',sum(ils) + end if + + recovery=.false. + do i=1, nr + if (ilaggr(i) < 0) then + ! + ! Now some silly rule to break ties: + ! Group with smallest adjacent aggregate. + ! + isz = nr+1 + ia = -1 + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do j=1, n_ne + k = neigh(j) + if ((1<=k).and.(k<=nr)) then + n = ilaggr(k) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 2? ',n,naggr + end if + + if (ils(n) < isz) then + ia = n + isz = ils(n) + endif + endif + endif + enddo + if (ia == -1) then + if (ilaggr(i) > -(nr+1)) then + ilaggr(i) = abs(ilaggr(i)) + if (ilaggr(I)>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 3? ',ilaggr(i),naggr + end if + ils(ilaggr(i)) = ils(ilaggr(i)) + 1 + ! + ! This might happen if the pattern is non symmetric. + ! Need a better handling. + ! + recovery = .true. + else + write(0,*) 'Unrecoverable error !!',ilaggr(i), nr + endif + else + ilaggr(i) = ia + if (ia>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 4? ',ia,naggr + end if + + ils(ia) = ils(ia) + 1 + endif + end if + enddo + if (recovery) then + write(0,*) 'Had to recover from strange situation in loc_aggregate.' + write(0,*) 'Perhaps an unsymmetric pattern?' + endif + if (debug) then + write(0,*) 'Phase 2: number of aggregates ',naggr + write(0,*) 'Phase 2: nodes aggregated ',sum(ils) + do i=1, naggr + write(*,*) 'Size of aggregate ',i,' :',count(ilaggr==i), ils(i) + enddo + write(*,*) maxval(ils(1:naggr)) + write(*,*) 'Leftovers ',count(ilaggr<0), ' in ',nlp,' loops' + end if + +!!$ write(0,*) 'desc_a loc_aggr 4 : ', desc_a%matrix_data(m_) + if (count(ilaggr<0) >0) then + write(0,*) 'Fatal error: some leftovers!!!' + endif + + deallocate(ils,neigh,stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + if (nrow /= size(ilaggr)) then + write(0,*) 'SOmething wrong ilaggr ',nrow,size(ilaggr) + endif + call psb_realloc(ncol,ilaggr,info) + if (info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(nlaggr(np),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + nlaggr(:) = 0 + nlaggr(me+1) = naggr + call psb_sum(ictxt,nlaggr(1:np)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zgenaggrmap diff --git a/psb_zilu_bld.f90 b/psb_zilu_bld.f90 new file mode 100644 index 00000000..47085381 --- /dev/null +++ b/psb_zilu_bld.f90 @@ -0,0 +1,364 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +!***************************************************************************** +!* * +!* This is where the action takes place. * +!* ASMATBLD does the setup: building the prec descriptor plus retrieving * +!* matrix rows if needed * +!* * +!* * +!* * +!* * +!* some open code does the renumbering * +!* * +!* * +!* * +!* * +!***************************************************************************** +subroutine psb_zilu_bld(a,desc_a,p,upd,info) + use psb_base_mod + use psb_prec_type + implicit none + ! + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. array Arguments .. + type(psb_zspmat_type), intent(in), target :: a + type(psb_zbaseprc_type), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + character, intent(in) :: upd + + ! .. Local Scalars .. + integer :: i, j, jj, k, kk, m + integer :: int_err(5) + character :: trans, unitd + type(psb_zspmat_type) :: blck, atmp + real(kind(1.d0)) :: t1,t2,t3,t4,t5,t6,mpi_wtime, t7, t8 + external mpi_wtime + logical, parameter :: debugprt=.false., debug=.false., aggr_dump=.false. + integer nztota, nztotb, nztmp, nzl, nnr, ir, err_act,& + & n_row, nrow_a,n_col, nhalo, ind, iind, i1,i2,ia + integer :: ictxt,np,me + character(len=20) :: name, ch_err + + interface psb_ilu_fct + subroutine psb_zilu_fct(a,l,u,d,info,blck) + use psb_base_mod + use psb_prec_type + integer, intent(out) :: info + type(psb_zspmat_type),intent(in) :: a + type(psb_zspmat_type),intent(inout) :: l,u + type(psb_zspmat_type),intent(in), optional, target :: blck + complex(kind(1.d0)), intent(inout) :: d(:) + end subroutine psb_zilu_fct + end interface + + interface psb_asmatbld + Subroutine psb_zasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_zspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_zasmatbld + end interface + + interface psb_sp_renum + subroutine psb_zsp_renum(a,desc_a,blck,p,atmp,info) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), intent(in) :: a,blck + type(psb_zspmat_type), intent(inout) :: atmp + type(psb_zbaseprc_type), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_zsp_renum + end interface + + if(psb_get_errstatus().ne.0) return + info=0 + name='psb_ilu_bld' + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_a) + call psb_info(ictxt, me, np) + + m = a%m + if (m < 0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + trans = 'N' + unitd = 'U' + if (p%iprcparm(n_ovr_) < 0) then + info = 11 + int_err(1) = 1 + int_err(2) = p%iprcparm(n_ovr_) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + t1= mpi_wtime() + + if(debug) write(0,*)me,': calling psb_asmatbld',p%iprcparm(p_type_),p%iprcparm(n_ovr_) + if (debug) call psb_barrier(ictxt) + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info) + if(info/=0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + t2= mpi_wtime() + if (debug) write(0,*)me,': out of psb_asmatbld' + if (debug) call psb_barrier(ictxt) + + if (allocated(p%av)) then + if (size(p%av) < bp_ilu_avsz) then + call psb_errpush(4010,name,a_err='Insufficient av size') + goto 9999 + endif + else + call psb_errpush(4010,name,a_err='AV not associated') + goto 9999 + endif + + nrow_a = psb_cd_get_local_rows(desc_a) + nztota = psb_sp_get_nnzeros(a) + nztotb = psb_sp_get_nnzeros(blck) + if (debug) write(0,*)me,': out get_nnzeros',nztota + if (debug) call psb_barrier(ictxt) + + n_col = psb_cd_get_local_cols(desc_a) + nhalo = n_col-nrow_a + n_row = p%desc_data%matrix_data(psb_n_row_) + p%av(l_pr_)%m = n_row + p%av(l_pr_)%k = n_row + p%av(u_pr_)%m = n_row + p%av(u_pr_)%k = n_row + call psb_sp_all(n_row,n_row,p%av(l_pr_),nztota+nztotb,info) + if (info == 0) call psb_sp_all(n_row,n_row,p%av(u_pr_),nztota+nztotb,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (allocated(p%d)) then + if (size(p%d) < n_row) then + deallocate(p%d) + endif + endif + if (.not.allocated(p%d)) then + allocate(p%d(n_row),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + endif + + + if (debug) then + write(0,*) me,'Done psb_asmatbld' + call psb_barrier(ictxt) + endif + + + if (p%iprcparm(iren_) > 0) then + + ! + ! Here we allocate a full copy to hold local A and received BLK + ! + + nztota = psb_sp_get_nnzeros(a) + nztotb = psb_sp_get_nnzeros(blck) + + call psb_sp_all(atmp,nztota+nztotb,info) + if(info/=0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + + ! write(0,*) 'ILU_BLD ',nztota,nztotb,a%m + + call psb_sp_renum(a,desc_a,blck,p,atmp,info) + + if(info/=0) then + info=4010 + ch_err='psb_sp_renum' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + t3 = mpi_wtime() + if (debugprt) then + call psb_barrier(ictxt) + open(40+me) + call psb_csprt(40+me,atmp,head='% Local matrix') + close(40+me) + endif + if (debug) write(0,*) me,' Factoring rows ',& + &atmp%m,a%m,blck%m,atmp%ia2(atmp%m+1)-1 + + ! + ! Ok, factor the matrix. + ! + t5 = mpi_wtime() + blck%m=0 + call psb_ilu_fct(atmp,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_ilu_fct' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_sp_free(atmp,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + else if (p%iprcparm(iren_) == 0) then + t3 = mpi_wtime() + ! This is where we have mo renumbering, thus no need + ! for ATMP + + if (debugprt) then + open(40+me) + call psb_barrier(ictxt) + call psb_csprt(40+me,a,iv=p%desc_data%loc_to_glob,& + & head='% Local matrix') + if (p%iprcparm(p_type_)==asm_) then + call psb_csprt(40+me,blck,iv=p%desc_data%loc_to_glob,& + & irs=a%m,head='% Received rows') + endif + close(40+me) + endif + + t5= mpi_wtime() + if (debug) write(0,*) me,' Going for ilu_fct' + if (debug) call psb_barrier(ictxt) + call psb_ilu_fct(a,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_ilu_fct' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) me,' Done dilu_fct' + endif + + + if (debugprt) then + ! + ! Print out the factors on file. + ! + open(80+me) + + call psb_csprt(80+me,p%av(l_pr_),head='% Local L factor') + write(80+me,*) '% Diagonal: ',p%av(l_pr_)%m + do i=1,p%av(l_pr_)%m + write(80+me,*) i,i,p%d(i) + enddo + call psb_csprt(80+me,p%av(u_pr_),head='% Local U factor') + + close(80+me) + endif + + + ! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" ) + t6 = mpi_wtime() + ! + ! write(0,'(i3,1x,a,3(1x,g18.9))') me,'renum/factor time',t3-t2,t6-t5 + ! if (me==0) write(0,'(a,3(1x,g18.9))') 'renum/factor time',t3-t2,t6-t5 + + call psb_sp_free(blck,info) + if(info/=0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (psb_sp_getifld(psb_upd_,p%av(u_pr_),info) /= psb_upd_perm_) then + call psb_sp_trimsize(p%av(u_pr_),i1,i2,ia,info) + if (info == 0) call psb_sp_reall(p%av(u_pr_),i1,i2,ia,info) + endif + + if (psb_sp_getifld(psb_upd_,p%av(l_pr_),info) /= psb_upd_perm_) then + call psb_sp_trimsize(p%av(l_pr_),i1,i2,ia,info) + if (info == 0) call psb_sp_reall(p%av(l_pr_),i1,i2,ia,info) + endif + + + if (debug) write(0,*) me,'End of ilu_bld' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + +end subroutine psb_zilu_bld + + diff --git a/psb_zilu_fct.f90 b/psb_zilu_fct.f90 new file mode 100644 index 00000000..22c96ab8 --- /dev/null +++ b/psb_zilu_fct.f90 @@ -0,0 +1,472 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zilu_fct(a,l,u,d,info,blck) + + ! + ! This routine copies and factors "on the fly" from A and BLCK + ! into L/D/U. + ! + ! + use psb_base_mod + use psb_prec_type + implicit none + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. Array Arguments .. + type(psb_zspmat_type),intent(in) :: a + type(psb_zspmat_type),intent(inout) :: l,u + type(psb_zspmat_type),intent(in), optional, target :: blck + complex(kind(1.d0)), intent(inout) :: d(:) + ! .. Local Scalars .. + integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act + type(psb_zspmat_type), pointer :: blck_ + character(len=20) :: name, ch_err + name='psb_zcsrlu' + info = 0 + call psb_erractionsave(err_act) + ! .. Executable Statements .. + ! + + if (present(blck)) then + blck_ => blck + else + allocate(blck_,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_nullify_sp(blck_) ! Why do we need this? Who knows.... + call psb_sp_all(0,0,blck_,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + blck_%m=0 + endif + + call psb_zilu_fctint(m,a%m,a,blck_%m,blck_,& + & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info) + if(info.ne.0) then + info=4010 + ch_err='psb_zilu_fctint' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l%infoa(1) = l1 + l%fida = 'CSR' + l%descra = 'TLU' + u%infoa(1) = l2 + u%fida = 'CSR' + u%descra = 'TUU' + l%m = m + l%k = m + u%m = m + u%k = m + if (present(blck)) then + blck_ => null() + else + call psb_sp_free(blck_,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + deallocate(blck_) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + subroutine psb_zilu_fctint(m,ma,a,mb,b,& + & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) + implicit none + + type(psb_zspmat_type) :: a,b + integer :: m,ma,mb,l1,l2,info + integer, dimension(*) :: lia1,lia2,uia1,uia2 + complex(kind(1.d0)), dimension(*) :: laspk,uaspk,d + + integer :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act + complex(kind(1.d0)) :: dia,temp + integer, parameter :: nrb=16 + logical,parameter :: debug=.false. + type(psb_zspmat_type) :: trw + integer :: int_err(5) + character(len=20) :: name, ch_err + + name='psb_zilu_fctint' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + call psb_nullify_sp(trw) + trw%m=0 + trw%k=0 + if(debug) write(0,*)'LUINT Allocating TRW' + call psb_sp_all(trw,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'LUINT Done Allocating TRW' + lia2(1) = 1 + uia2(1) = 1 + l1=0 + l2=0 + m = ma+mb + if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb + + do i = 1, ma + if(debug) write(0,*)'LUINT: Loop index ',i,ma + d(i) = zzero + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) to handle + ! anything. + ! + if (a%fida=='CSR') then + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = a%aspk(j) + lia1(l1) = k + else if (k == i) then + d(i) = a%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = a%aspk(j) + uia1(l2) = k + end if + enddo + + else + + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(ma-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + end if + +!!$ + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloop: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloop + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + ! write(0,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) + dia = dia - temp*uaspk(jj) + ! write(0,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) + cycle updateloop + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloop + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloop + enddo + ! + ! + ! Non singularity + ! + if (abs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + int_err(1) = i + write(ch_err,'(g20.10)') abs(dia) + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + dia = zone/dia + end if + d(i) = dia + ! write(6,*)'diag(',i,')=',d(i) + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + do i = ma+1, m + d(i) = zzero + + + if (b%fida=='CSR') then + + do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 + k = b%ia1(j) + ! if (me.eq.2) write(0,*)'ecco k=',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = b%aspk(j) + lia1(l1) = k + ! if(me.eq.2) write(0,*)'scrivo l' + else if (k == i) then + d(i) = b%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = b%aspk(j) + ! write(0,*)'KKKKK',k + uia1(l2) = k + end if + enddo + + else + + if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + endif + + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloopb: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloopb + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + dia = dia - temp*uaspk(jj) + cycle updateloopb + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloopb + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloopb + enddo + ! + ! + ! Non singularity + ! + if (abs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + int_err(1) = i + write(ch_err,'(g20.10)') abs(dia) + info = 2 + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + dia = zone/dia + end if + d(i) = dia + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + call psb_sp_free(trw,info) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'Leaving ilu_fct' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + end subroutine psb_zilu_fctint +end subroutine psb_zilu_fct diff --git a/psb_zmlprc_aply.f90 b/psb_zmlprc_aply.f90 new file mode 100644 index 00000000..d55bf707 --- /dev/null +++ b/psb_zmlprc_aply.f90 @@ -0,0 +1,779 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zmlprc_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + alpha*K^-1 X + ! where K is a multilevel preconditioner stored in baseprecv + ! + ! cfr.: Smith, Biorstad & Gropp + ! Domain Decomposition + ! Cambridge Univ. Press + ! + ! To each level I there corresponds a matrix A(I) and a preconditioner K(I) + ! + ! A notational difference: in the DD reference above the preconditioner for + ! a given level K(I) is written out as a sum over the subdomains + ! + ! SUM_k(R_k^T A_k R_k) + ! + ! whereas in this code the sum is implicit in the parallelization, + ! i.e. each process takes care of one subdomain, and for each level we have + ! as many subdomains as there are processes (except for the coarsest level where + ! we might have a replicated index space). Thus the sum apparently disappears + ! from our code, but only apparently, because it is implicit in the call + ! to psb_baseprc_aply. + ! + ! A bit of description of the baseprecv(:) data structure: + ! 1. Number of levels = NLEV = size(baseprecv(:)) + ! 2. baseprecv(ilev)%av(:) sparse matrices needed for the current level. + ! Includes: + ! 2.1.: baseprecv(ilev)%av(l_pr_) L factor of ILU preconditioners + ! 2.2.: baseprecv(ilev)%av(u_pr_) U factor of ILU preconditioners + ! 2.3.: baseprecv(ilev)%av(ap_nd_) Off-diagonal part of A for Jacobi sweeps + ! 2.4.: baseprecv(ilev)%av(ac_) Aggregated matrix of level ILEV + ! 2.5.: baseprecv(ilev)%av(sm_pr_t_) Smoother prolongator transpose; maps vectors + ! (ilev-1) ---> (ilev) + ! 2.6.: baseprecv(ilev)%av(sm_pr_) Smoother prolongator; maps vectors + ! (ilev) ---> (ilev-1) + ! Shouldn't we keep just one of them and handle transpose in the sparse BLAS? maybe + ! + ! 3. baseprecv(ilev)%desc_data comm descriptor for level ILEV + ! 4. baseprecv(ilev)%base_a Pointer (really a pointer!) to the base matrix + ! of the current level, i.e.: if ILEV=1 then A + ! else the aggregated matrix av(ac_); so we have + ! a unified treatment of residuals. Need this to + ! avoid passing explicitly matrix A to the + ! outer prec. routine + ! 5. baseprecv(ilev)%mlia The aggregation map from (ilev-1)-->(ilev) + ! if no smoother, it is used instead of sm_pr_ + ! 6. baseprecv(ilev)%nlaggr Number of aggregates on the various procs. + ! + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: baseprecv(:) + complex(kind(1.d0)),intent(in) :: alpha,beta + complex(kind(1.d0)),intent(inout) :: x(:), y(:) + character :: trans + complex(kind(1.d0)),target :: work(:) + integer, intent(out) :: info + + + ! Local variables + integer :: n_row,n_col + character ::diagl, diagu + integer :: ictxt,np,me,i, isz, nrg,nr2l,err_act, iptype, int_err(5) + real(kind(1.d0)) :: omega + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical, parameter :: debug=.false., debugprt=.false. + integer :: ismth, nlev, ilev + external mpi_wtime + character(len=20) :: name, ch_err + + type psb_mlprec_wrk_type + complex(kind(1.d0)), allocatable :: tx(:),ty(:),x2l(:),y2l(:) + end type psb_mlprec_wrk_type + type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) + + interface psb_baseprc_aply + subroutine psb_zbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: prec + complex(kind(1.d0)),intent(inout) :: x(:), y(:) + complex(kind(1.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + complex(kind(1.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_zbaseprc_aply + end interface + + name='psb_mlprc_aply' + info = 0 + call psb_erractionsave(err_act) + + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + nlev = size(baseprecv) + allocate(mlprec_wrk(nlev),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + select case(baseprecv(2)%iprcparm(ml_type_)) + + case(no_ml_) + ! Should not really get here. + call psb_errpush(4010,name,a_err='no_ml_ in mlprc_aply?') + goto 9999 + + + case(add_ml_prec_) + + + ! + ! Additive is very simple. + ! 1. X(1) = Xext + ! 2. DO ILEV=2,NLEV + ! X(ILEV) = AV(PR_SM_T_)*X(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! 3. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = AV(PR_SM_)*Y(ILEV+1) + ! 4. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + + + call psb_baseprc_aply(alpha,baseprecv(1),x,beta,y,& + & baseprecv(1)%base_desc,trans,work,info) + if(info /=0) goto 9999 + allocate(mlprec_wrk(1)%x2l(size(x)),mlprec_wrk(1)%y2l(size(y))) + mlprec_wrk(1)%x2l(:) = x(:) + + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + allocate(mlprec_wrk(ilev)%x2l(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%tx(max(n_row,n_col)),& + & mlprec_wrk(ilev)%ty(max(n_row,n_col)), stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = zzero + mlprec_wrk(ilev)%y2l(:) = zzero + mlprec_wrk(ilev)%tx(1:n_row) = mlprec_wrk(ilev-1)%x2l(1:n_row) + mlprec_wrk(ilev)%tx(n_row+1:max(n_row,n_col)) = zzero + mlprec_wrk(ilev)%ty(:) = zzero + + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + + + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + call psb_halo(mlprec_wrk(ilev-1)%x2l,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%x2l(n_row+1:max(n_row,n_col)) = zzero + end if + + call psb_csmm(zone,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%x2l,& + & zzero,mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%x2l(i) + end do + + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) Then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + call psb_baseprc_aply(zone,baseprecv(ilev),& + & mlprec_wrk(ilev)%x2l,zzero,mlprec_wrk(ilev)%y2l,& + & baseprecv(ilev)%desc_data, 'N',work,info) + + enddo + + do ilev =nlev,2,-1 + + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + + if (ismth /= no_smth_) then + + call psb_csmm(zone,baseprecv(ilev)%av(sm_pr_),mlprec_wrk(ilev)%y2l,& + & zone,mlprec_wrk(ilev-1)%y2l,info) + if(info /=0) goto 9999 + + else + + do i=1, n_row + mlprec_wrk(ilev-1)%y2l(i) = mlprec_wrk(ilev-1)%y2l(i) + & + & mlprec_wrk(ilev)%y2l(baseprecv(ilev)%mlia(i)) + enddo + + end if + end do + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,zone,y,baseprecv(1)%base_desc,info) + if(info /=0) goto 9999 + + + case(mult_ml_prec_) + + ! + ! Multiplicative multilevel + ! Pre/post smoothing versions. + ! + + select case(baseprecv(2)%iprcparm(smth_pos_)) + + case(post_smooth_) + + ! + ! Post smoothing. + ! 1. X(1) = Xext + ! 2. DO ILEV=2, NLEV :: X(ILEV) = AV(PR_SM_T_,ILEV)*X(ILEV-1) + ! 3. Y(NLEV) = (K(NLEV)**(-1))*X(NLEV) + ! 4. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! Y(ILEV) = Y(ILEV) + (K(ILEV)**(-1))*(X(ILEV)-A(ILEV)*Y(ILEV)) + ! + ! 5. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! Also: post smoothing in the ref. DD is only presented for NLEV=2. + ! + ! + + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%tx(nr2l), stat=info) + mlprec_wrk(1)%x2l(:) = zzero + mlprec_wrk(1)%y2l(:) = zzero + mlprec_wrk(1)%tx(:) = zzero + + call psb_geaxpby(zone,x,zzero,mlprec_wrk(1)%tx,& + & baseprecv(1)%base_desc,info) + call psb_geaxpby(zone,x,zzero,mlprec_wrk(1)%x2l,& + & baseprecv(1)%base_desc,info) + + do ilev=2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth = baseprecv(ilev)%iprcparm(smth_kind_) + + allocate(mlprec_wrk(ilev)%tx(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = zzero + mlprec_wrk(ilev)%y2l(:) = zzero + mlprec_wrk(ilev)%tx(:) = zzero + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + call psb_halo(mlprec_wrk(ilev-1)%x2l,& + & baseprecv(ilev-1)%base_desc,info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%x2l(n_row+1:max(n_row,n_col)) = zzero + end if + + call psb_csmm(zone,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%x2l, & + & zzero,mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%x2l(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) Then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + call psb_geaxpby(zone,mlprec_wrk(ilev)%x2l,zzero,mlprec_wrk(ilev)%tx,& + & baseprecv(ilev)%base_desc,info) + if(info /=0) goto 9999 + + enddo + + + call psb_baseprc_aply(zone,baseprecv(nlev),mlprec_wrk(nlev)%x2l, & + & zzero, mlprec_wrk(nlev)%y2l,baseprecv(nlev)%desc_data,'N',work,info) + + if(info /=0) goto 9999 + + + do ilev=nlev-1, 1, -1 + ismth = baseprecv(ilev+1)%iprcparm(smth_kind_) + if (ismth /= no_smth_) then + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,baseprecv(ilev+1)%desc_data,& + & info,work=work) + call psb_csmm(zone,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & zzero,mlprec_wrk(ilev)%y2l,info) + if(info /=0) goto 9999 + + else + n_row = baseprecv(ilev)%base_desc%matrix_data(psb_n_row_) + mlprec_wrk(ilev)%y2l(:) = zzero + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + call psb_spmm(-zone,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & zone,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + + if(info /=0) goto 9999 + + call psb_baseprc_aply(zone,baseprecv(ilev),mlprec_wrk(ilev)%tx,& + & zone,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%base_desc, trans, work,info) + + if(info /=0) goto 9999 + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + + case(pre_smooth_) + + ! + ! Pre smoothing. + ! 1. X(1) = Xext + ! 2. Y(1) = (K(1)**(-1))*X(1) + ! 3. TX(1) = X(1) - A(1)*Y(1) + ! 4. DO ILEV=2, NLEV + ! X(ILEV) = AV(PR_SM_T_,ILEV)*TX(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! TX(ILEV) = (X(ILEV)-A(ILEV)*Y(ILEV)) + ! 5. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = Y(ILEV) + AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! 6. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! + + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%tx(nr2l), stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(1)%y2l(:) = zzero + mlprec_wrk(1)%x2l(:) = x + + call psb_baseprc_aply(zone,baseprecv(1),mlprec_wrk(1)%x2l,& + & zzero,mlprec_wrk(1)%y2l,& + & baseprecv(1)%base_desc,& + & trans,work,info) + + if(info /=0) goto 9999 + + mlprec_wrk(1)%tx = mlprec_wrk(1)%x2l + + call psb_spmm(-zone,baseprecv(1)%base_a,mlprec_wrk(1)%y2l,& + & zone,mlprec_wrk(1)%tx,baseprecv(1)%base_desc,info,work=work) + if(info /=0) goto 9999 + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth = baseprecv(ilev)%iprcparm(smth_kind_) + allocate(mlprec_wrk(ilev)%tx(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + mlprec_wrk(ilev)%x2l(:) = zzero + mlprec_wrk(ilev)%y2l(:) = zzero + mlprec_wrk(ilev)%tx(:) = zzero + + + if (ismth /= no_smth_) then + ! + !Smoothed Aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + + call psb_halo(mlprec_wrk(ilev-1)%tx,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%tx(n_row+1:max(n_row,n_col)) = zzero + end if + + call psb_csmm(zone,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%tx,zzero,& + & mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcuts + ! + mlprec_wrk(ilev)%x2l = zzero + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%tx(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + + call psb_baseprc_aply(zone,baseprecv(ilev),mlprec_wrk(ilev)%x2l,& + & zzero,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%desc_data, 'N',work,info) + + if(info /=0) goto 9999 + + if(ilev < nlev) then + mlprec_wrk(ilev)%tx = mlprec_wrk(ilev)%x2l + call psb_spmm(-zone,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & zone,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + if(info /=0) goto 9999 + endif + + enddo + + do ilev = nlev-1, 1, -1 + + ismth=baseprecv(ilev+1)%iprcparm(smth_kind_) + + if (ismth /= no_smth_) then + + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,& + & baseprecv(ilev+1)%desc_data,info,work=work) + call psb_csmm(zone,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & zone,mlprec_wrk(ilev)%y2l,info) + + if(info /=0) goto 9999 + + else + + n_row = baseprecv(ilev+1)%base_desc%matrix_data(psb_n_row_) + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,& + & baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + + + case(smooth_both_) + + ! + ! Symmetrized smoothing. + ! 1. X(1) = Xext + ! 2. Y(1) = (K(1)**(-1))*X(1) + ! 3. TX(1) = X(1) - A(1)*Y(1) + ! 4. DO ILEV=2, NLEV + ! X(ILEV) = AV(PR_SM_T_,ILEV)*TX(ILEV-1) + ! Y(ILEV) = (K(ILEV)**(-1))*X(ILEV) + ! TX(ILEV) = (X(ILEV)-A(ILEV)*Y(ILEV)) + ! 5. DO ILEV=NLEV-1,1,-1 + ! Y(ILEV) = Y(ILEV) + AV(PR_SM_,ILEV+1)*Y(ILEV+1) + ! Y(ILEV) = Y(ILEV) + (K(ILEV)**(-1))*(X(ILEV)-A(ILEV)*Y(ILEV)) + ! 6. Yext = beta*Yext + alpha*Y(1) + ! + ! Note: level numbering reversed wrt ref. DD, i.e. + ! 1..NLEV <=> (j) <-> 0 + ! + ! + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(1)%desc_data%matrix_data(psb_n_col_) + + allocate(mlprec_wrk(1)%x2l(nr2l),mlprec_wrk(1)%y2l(nr2l), & + & mlprec_wrk(1)%ty(nr2l), mlprec_wrk(1)%tx(nr2l), stat=info) + + mlprec_wrk(1)%x2l(:) = zzero + mlprec_wrk(1)%y2l(:) = zzero + mlprec_wrk(1)%tx(:) = zzero + mlprec_wrk(1)%ty(:) = zzero + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_geaxpby(zone,x,zzero,mlprec_wrk(1)%x2l,& + & baseprecv(1)%base_desc,info) + call psb_geaxpby(zone,x,zzero,mlprec_wrk(1)%tx,& + & baseprecv(1)%base_desc,info) + + call psb_baseprc_aply(zone,baseprecv(1),mlprec_wrk(1)%x2l,& + & zzero,mlprec_wrk(1)%y2l,& + & baseprecv(1)%base_desc,& + & trans,work,info) + + if(info /=0) goto 9999 + + mlprec_wrk(1)%ty = mlprec_wrk(1)%x2l + + call psb_spmm(-zone,baseprecv(1)%base_a,mlprec_wrk(1)%y2l,& + & zone,mlprec_wrk(1)%ty,baseprecv(1)%base_desc,info,work=work) + if(info /=0) goto 9999 + + do ilev = 2, nlev + n_row = baseprecv(ilev-1)%base_desc%matrix_data(psb_n_row_) + n_col = baseprecv(ilev-1)%desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(ilev)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(ilev)%desc_data%matrix_data(psb_n_row_) + ismth=baseprecv(ilev)%iprcparm(smth_kind_) + allocate(mlprec_wrk(ilev)%ty(nr2l),mlprec_wrk(ilev)%y2l(nr2l),& + & mlprec_wrk(ilev)%x2l(nr2l), stat=info) + + mlprec_wrk(ilev)%x2l(:) = zzero + mlprec_wrk(ilev)%y2l(:) = zzero + mlprec_wrk(ilev)%tx(:) = zzero + mlprec_wrk(ilev)%ty(:) = zzero + + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + + if (ismth /= no_smth_) then + ! + !Smoothed Aggregation + ! + if (baseprecv(ilev)%iprcparm(glb_smth_) >0) then + + call psb_halo(mlprec_wrk(ilev-1)%ty,baseprecv(ilev-1)%base_desc,& + & info,work=work) + if(info /=0) goto 9999 + else + mlprec_wrk(ilev-1)%ty(n_row+1:max(n_row,n_col)) = zzero + end if + + call psb_csmm(zone,baseprecv(ilev)%av(sm_pr_t_),mlprec_wrk(ilev-1)%ty,zzero,& + & mlprec_wrk(ilev)%x2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcuts + ! + mlprec_wrk(ilev)%x2l = zzero + do i=1,n_row + mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) = & + & mlprec_wrk(ilev)%x2l(baseprecv(ilev)%mlia(i)) + & + & mlprec_wrk(ilev-1)%ty(i) + end do + end if + + if (baseprecv(ilev)%iprcparm(coarse_mat_)==mat_repl_) then + call psb_sum(ictxt,mlprec_wrk(ilev)%x2l(1:nrg)) + else if (baseprecv(ilev)%iprcparm(coarse_mat_) /= mat_distr_) then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(ilev)%iprcparm(coarse_mat_) + endif + + call psb_geaxpby(zone,mlprec_wrk(ilev)%x2l,zzero,mlprec_wrk(ilev)%tx,& + & baseprecv(ilev)%base_desc,info) + if(info /=0) goto 9999 + + call psb_baseprc_aply(zone,baseprecv(ilev),mlprec_wrk(ilev)%x2l,& + & zzero,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%desc_data, 'N',work,info) + + if(info /=0) goto 9999 + + if(ilev < nlev) then + mlprec_wrk(ilev)%ty = mlprec_wrk(ilev)%x2l + call psb_spmm(-zone,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & zone,mlprec_wrk(ilev)%ty,baseprecv(ilev)%base_desc,info,work=work) + if(info /=0) goto 9999 + endif + + enddo + + + do ilev=nlev-1, 1, -1 + + ismth=baseprecv(ilev+1)%iprcparm(smth_kind_) + if (ismth /= no_smth_) then + if (ismth == smth_omg_) & + & call psb_halo(mlprec_wrk(ilev+1)%y2l,baseprecv(ilev+1)%desc_data,& + & info,work=work) + call psb_csmm(zone,baseprecv(ilev+1)%av(sm_pr_),mlprec_wrk(ilev+1)%y2l,& + & zone,mlprec_wrk(ilev)%y2l,info) + if(info /=0) goto 9999 + + else + n_row = baseprecv(ilev)%base_desc%matrix_data(psb_n_row_) + do i=1, n_row + mlprec_wrk(ilev)%y2l(i) = mlprec_wrk(ilev)%y2l(i) + & + & mlprec_wrk(ilev+1)%y2l(baseprecv(ilev+1)%mlia(i)) + enddo + + end if + + call psb_spmm(-zone,baseprecv(ilev)%base_a,mlprec_wrk(ilev)%y2l,& + & zone,mlprec_wrk(ilev)%tx,baseprecv(ilev)%base_desc,info,work=work) + + if(info /=0) goto 9999 + + call psb_baseprc_aply(zone,baseprecv(ilev),mlprec_wrk(ilev)%tx,& + & zone,mlprec_wrk(ilev)%y2l,baseprecv(ilev)%base_desc, trans, work,info) + + if(info /=0) goto 9999 + + enddo + + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,& + & baseprecv(1)%base_desc,info) + + if(info /=0) goto 9999 + + case default + + call psb_errpush(4013,name,a_err='wrong smooth_pos',& + & i_Err=(/baseprecv(2)%iprcparm(smth_pos_),0,0,0,0/)) + goto 9999 + + end select + + case default + call psb_errpush(4013,name,a_err='wrong mltype',& + & i_Err=(/baseprecv(2)%iprcparm(ml_type_),0,0,0,0/)) + goto 9999 + + end select + + deallocate(mlprec_wrk) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +!!$contains +!!$ subroutine mlprec_wrk_free(wrk) +!!$ type(psb_mlprec_wrk_type) :: wrk(:) +!!$ ! This will not be needed when we have allocatables, as +!!$ ! it is sufficient to deallocate the container, and +!!$ ! the compiler is supposed to recursively deallocate the +!!$ ! various components. +!!$ integer i +!!$ +!!$ do i=1, size(wrk) +!!$ if (associated(wrk(i)%tx)) deallocate(wrk(i)%tx) +!!$ if (associated(wrk(i)%ty)) deallocate(wrk(i)%ty) +!!$ if (associated(wrk(i)%x2l)) deallocate(wrk(i)%x2l) +!!$ if (associated(wrk(i)%y2l)) deallocate(wrk(i)%y2l) +!!$ if (associated(wrk(i)%b2l)) deallocate(wrk(i)%b2l) +!!$ if (associated(wrk(i)%tty)) deallocate(wrk(i)%tty) +!!$ end do +!!$ end subroutine mlprec_wrk_free + +end subroutine psb_zmlprc_aply + diff --git a/psb_zmlprc_bld.f90 b/psb_zmlprc_bld.f90 new file mode 100644 index 00000000..6fb737c7 --- /dev/null +++ b/psb_zmlprc_bld.f90 @@ -0,0 +1,198 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zmlprc_bld(a,desc_a,p,info) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_zspmat_type), intent(in), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_zbaseprc_type), intent(inout),target :: p + integer, intent(out) :: info + + type(psb_desc_type) :: desc_ac + + integer :: i, nrg, nzg, err_act,k + character(len=20) :: name, ch_err + logical, parameter :: debug=.false. + type(psb_zspmat_type) :: ac + + interface psb_baseprc_bld + subroutine psb_zbaseprc_bld(a,desc_a,p,info,upd) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), target :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + end subroutine psb_zbaseprc_bld + end interface + + interface psb_genaggrmap + subroutine psb_zgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_base_mod + use psb_prec_type + implicit none + integer, intent(in) :: aggr_type + type(psb_zspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, allocatable :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + end subroutine psb_zgenaggrmap + end interface + + interface psb_bldaggrmat + subroutine psb_zbldaggrmat(a,desc_a,ac,desc_ac,p,info) + use psb_base_mod + use psb_prec_type + type(psb_zspmat_type), intent(in), target :: a + type(psb_zbaseprc_type), intent(inout),target :: p + type(psb_zspmat_type), intent(out),target :: ac + type(psb_desc_type), intent(in) :: desc_a + type(psb_desc_type), intent(inout) :: desc_ac + integer, intent(out) :: info + end subroutine psb_zbldaggrmat + end interface + + integer :: ictxt, np, me + + name='psb_mlprec_bld' + if (psb_get_errstatus().ne.0) return + info = 0 + ictxt = psb_cd_get_context(desc_a) + call psb_info(ictxt,me,np) + call psb_erractionsave(err_act) + call psb_nullify_sp(ac) + + + if (.not.allocated(p%iprcparm)) then + info = 2222 + call psb_errpush(info,name) + goto 9999 + endif + call psb_check_def(p%iprcparm(ml_type_),'Multilevel type',& + & mult_ml_prec_,is_legal_ml_type) + call psb_check_def(p%iprcparm(aggr_alg_),'aggregation',& + & loc_aggr_,is_legal_ml_aggr_kind) + call psb_check_def(p%iprcparm(smth_kind_),'Smoother kind',& + & smth_omg_,is_legal_ml_smth_kind) + call psb_check_def(p%iprcparm(coarse_mat_),'Coarse matrix',& + & mat_distr_,is_legal_ml_coarse_mat) + call psb_check_def(p%iprcparm(smth_pos_),'smooth_pos',& + & pre_smooth_,is_legal_ml_smooth_pos) + + +!!$ nullify(p%desc_data) + select case(p%iprcparm(f_type_)) + case(f_ilu_n_) + call psb_check_def(p%iprcparm(ilu_fill_in_),'Level',0,is_legal_ml_lev) + case(f_ilu_e_) + call psb_check_def(p%dprcparm(fact_eps_),'Eps',dzero,is_legal_ml_eps) + end select + call psb_check_def(p%dprcparm(smooth_omega_),'omega',dzero,is_legal_omega) + call psb_check_def(p%iprcparm(jac_sweeps_),'Jacobi sweeps',& + & 1,is_legal_jac_sweeps) + + + ! Currently this is ignored by gen_aggrmap, but it could be + ! changed in the future. Need to package nlaggr & mlia in a + ! private data structure? + call psb_genaggrmap(p%iprcparm(aggr_alg_),a,desc_a,p%nlaggr,p%mlia,info) + if(info /= 0) then + info=4010 + ch_err='psb_gen_aggrmap' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) 'Out from genaggrmap',p%nlaggr + + call psb_nullify_desc(desc_ac) + call psb_bldaggrmat(a,desc_a,ac,desc_ac,p,info) + if(info /= 0) then + info=4010 + ch_err='psb_bld_aggrmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'Out from bldaggrmat',desc_ac%matrix_data(:) + + + + call psb_baseprc_bld(ac,desc_ac,p,info) + if (debug) write(0,*) 'Out from baseprcbld',info + if(info /= 0) then + info=4010 + ch_err='psb_baseprc_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + ! + ! We have used a separate ac because: + ! 1. We want to reuse the same routines psb_ilu_bld etc. + ! 2. We do NOT want to pass an argument twice to them + ! p%av(ac_) and p, as this would violate the Fortran standard + ! Hence a separate AC and a TRANSFER function at the end. + ! + call psb_sp_transfer(ac,p%av(ac_),info) + p%base_a => p%av(ac_) + call psb_cdtransfer(desc_ac,p%desc_ac,info) + + if (info /= 0) then + info=4010 + ch_err='psb_cdtransfer' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + p%base_desc => p%desc_ac + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +end subroutine psb_zmlprc_bld diff --git a/psb_zprc_aply.f90 b/psb_zprc_aply.f90 new file mode 100644 index 00000000..ca48ce96 --- /dev/null +++ b/psb_zprc_aply.f90 @@ -0,0 +1,247 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zprc_aply(prec,x,y,desc_data,info,trans, work) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_zprec_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + complex(kind(0.d0)), optional, target :: work(:) + + ! Local variables + character :: trans_ + complex(kind(1.d0)), pointer :: work_(:) + integer :: ictxt,np,me,err_act + logical,parameter :: debug=.false., debugprt=.false. + external mpi_wtime + character(len=20) :: name + + interface psb_baseprc_aply + subroutine psb_zbaseprc_aply(alpha,prec,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + complex(kind(0.d0)),intent(in) :: alpha,beta + character(len=1) :: trans + complex(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_zbaseprc_aply + end interface + + interface psb_mlprc_aply + subroutine psb_zmlprc_aply(alpha,baseprecv,x,beta,y,desc_data,trans,work,info) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_zbaseprc_type), intent(in) :: baseprecv(:) + complex(kind(0.d0)),intent(in) :: alpha,beta + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + character :: trans + complex(kind(0.d0)),target :: work(:) + integer, intent(out) :: info + end subroutine psb_zmlprc_aply + end interface + + name='psb_zprc_aply' + info = 0 + call psb_erractionsave(err_act) + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + if (present(work)) then + work_ => work + else + allocate(work_(4*desc_data%matrix_data(psb_n_col_)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + end if + + if (.not.(allocated(prec%baseprecv))) then + write(0,*) 'Inconsistent preconditioner: neither SMTH nor BASE?' + end if + if (size(prec%baseprecv) >1) then + if (debug) write(0,*) 'Into mlprc_aply',size(x),size(y) + call psb_mlprc_aply(zone,prec%baseprecv,x,zzero,y,desc_data,trans_,work_,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_zmlprc_aply') + goto 9999 + end if + + else if (size(prec%baseprecv) == 1) then + call psb_baseprc_aply(zone,prec%baseprecv(1),x,zzero,y,desc_data,trans_, work_,info) + else + write(0,*) 'Inconsistent preconditioner: size of baseprecv???' + endif + + if (present(work)) then + else + deallocate(work_) + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zprc_aply + + +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zprc_aply1(prec,x,desc_data,info,trans) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_zprec_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:) + integer, intent(out) :: info + character(len=1), optional :: trans + logical,parameter :: debug=.false., debugprt=.false. + + interface + subroutine psb_zprc_aply(prec,x,y,desc_data,info,trans, work) + use psb_base_mod + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_zprec_type), intent(in) :: prec + complex(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + complex(kind(0.d0)), optional, target :: work(:) + end subroutine psb_zprc_aply + end interface + + ! Local variables + character :: trans_ + integer :: ictxt,np,me,i, isz, err_act, int_err(5) + complex(kind(1.d0)), pointer :: WW(:), w1(:) + character(len=20) :: name, ch_err + name='psb_zprec1' + info = 0 + call psb_erractionsave(err_act) + + + ictxt=desc_data%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + allocate(ww(size(x)),w1(size(x)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + if (debug) write(0,*) 'Prc_aply1 Size(x) ',size(x), size(ww),size(w1) + call psb_zprc_aply(prec,x,ww,desc_data,info,trans_,work=w1) + if(info /=0) goto 9999 + x(:) = ww(:) + deallocate(ww,W1) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psb_zprc_aply1 diff --git a/psb_zprecbld.f90 b/psb_zprecbld.f90 new file mode 100644 index 00000000..2a51df83 --- /dev/null +++ b/psb_zprecbld.f90 @@ -0,0 +1,169 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zprecbld(a,desc_a,p,info,upd) + + use psb_base_mod + use psb_prec_type + use psb_prec_mod + Implicit None + + type(psb_zspmat_type), target :: a + type(psb_desc_type), intent(in), target :: desc_a + type(psb_zprec_type),intent(inout) :: p + integer, intent(out) :: info + character, intent(in), optional :: upd + + + ! Local scalars + Integer :: err,i,j,k,ictxt, me,np,lw, err_act + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_precbld' + + if (debug) write(0,*) 'Entering precbld',P%prec,desc_a%matrix_data(:) + info = 0 + int_err(1) = 0 + ictxt = psb_cd_get_context(desc_a) + + if (debug) write(0,*) 'Preconditioner psb_info' + call psb_info(ictxt, me, np) + + if (present(upd)) then + if (debug) write(0,*) 'UPD ', upd + if ((upd.eq.'F').or.(upd.eq.'T')) then + iupd=upd + else + iupd='F' + endif + else + iupd='F' + endif + + if (.not.allocated(p%baseprecv)) then + !! Error 1: should call precset + info=4010 + ch_err='unallocated bpv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ! + ! Should add check to ensure all procs have the same... + ! + ! ALso should define symbolic names for the preconditioners. + ! + if (size(p%baseprecv) >= 1) then + call init_baseprc_av(p%baseprecv(1),info) + if (info /= 0) then + info=4010 + ch_err='allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_baseprc_bld(a,desc_a,p%baseprecv(1),info,iupd) + + else + info=4010 + ch_err='size bpv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + endif + + if (size(p%baseprecv) > 1) then + + do i=2, size(p%baseprecv) + call init_baseprc_av(p%baseprecv(i),info) + if (info /= 0) then + info=4010 + ch_err='allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_mlprc_bld(p%baseprecv(i-1)%base_a,p%baseprecv(i-1)%base_desc,& + & p%baseprecv(i),info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name) + goto 9999 + endif + if (debug) then + write(0,*) 'Return from ',i-1,' call to mlprcbld ',info + endif + + end do + + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine init_baseprc_av(p,info) + type(psb_zbaseprc_type), intent(inout) :: p + integer :: info + if (allocated(p%av)) then + ! Have not decided what to do yet + end if + allocate(p%av(max_avsz),stat=info) +!!$ if (info /= 0) return + do k=1,size(p%av) + call psb_nullify_sp(p%av(k)) + end do + end subroutine init_baseprc_av + +end subroutine psb_zprecbld + diff --git a/psb_zprecfree.f90 b/psb_zprecfree.f90 new file mode 100644 index 00000000..7db0b54d --- /dev/null +++ b/psb_zprecfree.f90 @@ -0,0 +1,74 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zprecfree(p,info) + use psb_base_mod + use psb_prec_type + implicit none + !....parameters... + + type(psb_zprec_type), intent(inout) :: p + integer, intent(out) :: info + + !...locals.... + integer :: ictxt,me, np,err_act,i + character(len=20) :: name + + if(psb_get_errstatus().ne.0) return + info=0 + name = 'pszprecfree' + call psb_erractionsave(err_act) + + me=-1 + + if (allocated(p%baseprecv)) then + do i=1,size(p%baseprecv) + call psb_base_precfree(p%baseprecv(i),info) + end do + deallocate(p%baseprecv) + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_zprecfree diff --git a/psb_zprecset.f90 b/psb_zprecset.f90 new file mode 100644 index 00000000..5c79e1d9 --- /dev/null +++ b/psb_zprecset.f90 @@ -0,0 +1,187 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zprecset(p,ptype,info,iv,rs,rv,ilev,nlev) + + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_zprec_type), intent(inout) :: p + character(len=*), intent(in) :: ptype + integer, intent(out) :: info + integer, optional, intent(in) :: iv(:) + integer, optional, intent(in) :: nlev,ilev + real(kind(1.d0)), optional, intent(in) :: rs + real(kind(1.d0)), optional, intent(in) :: rv(:) + + character(len=len(ptype)) :: typeup + integer :: isz, err, nlev_, ilev_, i + + info = 0 + + if (present(ilev)) then + ilev_ = max(1, ilev) + else + ilev_ = 1 + end if + if (present(nlev)) then + if (allocated(p%baseprecv)) then + write(0,*) 'Warning: NLEV is ignored when P is already allocated' + end if + nlev_ = max(1, nlev) + else + nlev_ = 1 + end if + + if (.not.allocated(p%baseprecv)) then + allocate(p%baseprecv(nlev_),stat=err) + else + nlev_ = size(p%baseprecv) + endif + + if ((ilev_<1).or.(ilev_ > nlev_)) then + write(0,*) 'PRECSET ERRROR: ilev out of bounds' + info = -1 + return + endif + + call psb_realloc(ifpsz,p%baseprecv(ilev_)%iprcparm,info) + if (info == 0) call psb_realloc(dfpsz,p%baseprecv(ilev_)%dprcparm,info) + if (info /= 0) return + p%baseprecv(ilev_)%iprcparm(:) = 0 + + select case(toupper(ptype(1:len_trim(ptype)))) + case ('NONE','NOPREC') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = noprec_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_none_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('DIAG','DIAGSC') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = diagsc_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_none_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('BJA','ILU') + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = bja_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + case ('ASM','AS') + p%baseprecv(ilev_)%iprcparm(:) = 0 + ! Defaults first + p%baseprecv(ilev_)%iprcparm(p_type_) = asm_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_halo_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 1 + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(ilev_)%iprcparm(n_ovr_) = iv(1) + if (isz >= 2) p%baseprecv(ilev_)%iprcparm(restr_) = iv(2) + if (isz >= 3) p%baseprecv(ilev_)%iprcparm(prol_) = iv(3) + if (isz >= 4) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(4) + ! Do not consider renum for the time being. +!!$ if (isz >= 5) p%baseprecv(ilev_)%iprcparm(iren_) = iv(5) + end if + + + case ('ML', '2L', '2LEV') + + + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(p_type_) = bja_ + p%baseprecv(ilev_)%iprcparm(restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(iren_) = 0 + p%baseprecv(ilev_)%iprcparm(n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(ml_type_) = mult_ml_prec_ + p%baseprecv(ilev_)%iprcparm(aggr_alg_) = loc_aggr_ + p%baseprecv(ilev_)%iprcparm(smth_kind_) = smth_omg_ + p%baseprecv(ilev_)%iprcparm(coarse_mat_) = mat_distr_ + p%baseprecv(ilev_)%iprcparm(smth_pos_) = post_smooth_ + p%baseprecv(ilev_)%iprcparm(glb_smth_) = 1 + p%baseprecv(ilev_)%iprcparm(om_choice_) = lib_choice_ + p%baseprecv(ilev_)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(ilev_)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(ilev_)%dprcparm(smooth_omega_) = 4.d0/3.d0 + p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = 1 + + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(ilev_)%iprcparm(ml_type_) = iv(1) + if (isz >= 2) p%baseprecv(ilev_)%iprcparm(aggr_alg_) = iv(2) + if (isz >= 3) p%baseprecv(ilev_)%iprcparm(coarse_mat_) = iv(3) + if (isz >= 4) p%baseprecv(ilev_)%iprcparm(smth_pos_) = iv(4) + if (isz >= 5) p%baseprecv(ilev_)%iprcparm(f_type_) = iv(5) + if (isz >= 6) p%baseprecv(ilev_)%iprcparm(jac_sweeps_) = iv(6) + if (isz >= 7) p%baseprecv(ilev_)%iprcparm(smth_kind_) = iv(7) + end if + + if (present(rs)) then + p%baseprecv(ilev_)%iprcparm(om_choice_) = user_choice_ + p%baseprecv(ilev_)%dprcparm(smooth_omega_) = rs + end if + + + case default + write(0,*) 'Unknown preconditioner type request "',ptype,'"' + err = 2 + + end select + + info = err + +end subroutine psb_zprecset diff --git a/psb_zslu_bld.f90 b/psb_zslu_bld.f90 new file mode 100644 index 00000000..1b51d6d1 --- /dev/null +++ b/psb_zslu_bld.f90 @@ -0,0 +1,204 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zslu_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + + type(psb_zspmat_type) :: blck, atmp + character(len=5) :: fmt + character :: upd='F' + integer :: i,j,nza,nzb,nzt,ictxt, me,np,err_act + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + interface psb_asmatbld + Subroutine psb_zasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_zspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_zasmatbld + end interface + + if(psb_get_errstatus().ne.0) return + info=0 + name='psb_slu_bld' + call psb_erractionsave(err_act) + + ictxt = desc_A%matrix_data(psb_ctxt_) + + call psb_info(ictxt, me, np) + + fmt = 'COO' + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + atmp%fida='COO' + if (Debug) then + write(0,*) me, 'SPLUBLD: Calling csdp' + call psb_barrier(ictxt) + endif + + call psb_csdp(a,atmp,info) + if(info /= 0) then + info=4010 + ch_err='psb_csdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nza = atmp%infoa(psb_nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done csdp',info,nza,atmp%m,atmp%k + call psb_barrier(ictxt) + endif + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info,outfmt=fmt) + if(info /= 0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzb = blck%infoa(psb_nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done asmatbld',info,nzb,blck%fida + call psb_barrier(ictxt) + endif + if (nzb > 0 ) then + if (size(atmp%aspk) + +#endif + + +#ifdef Add_ +#define psb_zslu_factor_ psb_zslu_factor_ +#define psb_zslu_solve_ psb_zslu_solve_ +#define psb_zslu_free_ psb_zslu_free_ +#endif +#ifdef AddDouble_ +#define psb_zslu_factor_ psb_zslu_factor__ +#define psb_zslu_solve_ psb_zslu_solve__ +#define psb_zslu_free_ psb_zslu_free__ +#endif +#ifdef NoChange +#define psb_zslu_factor_ psb_zslu_factor +#define psb_zslu_solve_ psb_zslu_solve +#define psb_zslu_free_ psb_zslu_free +#endif + + + + +void +psb_zslu_factor_(int *n, int *nnz, +#ifdef Have_SLU_ + doublecomplex *values, int *colind, int *rowptr, + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *values, int *colind, int *rowptr, + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs LU decomposition. + * + * f_factors (input/output) fptr* + * On output contains the pointer pointing to + * the structure of the factored matrices. + * + */ + +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + + + /* Set the default input options. */ + set_default_options(&options); + + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Adjust to 0-based indexing */ + for (i = 0; i < *nnz; ++i) --colind[i]; + for (i = 0; i <= *n; ++i) --rowptr[i]; + + zCreate_CompRow_Matrix(&A, *n, *n, *nnz, values, colind, rowptr, + SLU_NR, SLU_Z, SLU_GE); + L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); + if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); + if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); + + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: natural ordering + * permc_spec = 1: minimum degree on structure of A'*A + * permc_spec = 2: minimum degree on structure of A'+A + * permc_spec = 3: approximate minimum degree for unsymmetric matrices + */ + options.ColPerm=2; + permc_spec = options.ColPerm; + get_perm_c(permc_spec, &A, perm_c); + + sp_preorder(&options, &A, perm_c, etree, &AC); + + panel_size = sp_ienv(1); + relax = sp_ienv(2); + + zgstrf(&options, &AC, drop_tol, relax, panel_size, + etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); + + if ( *info == 0 ) { + Lstore = (SCformat *) L->Store; + Ustore = (NCformat *) U->Store; + zQuerySpace(L, U, &mem_usage); +#if 0 + printf("No of nonzeros in factor L = %d\n", Lstore->nnz); + printf("No of nonzeros in factor U = %d\n", Ustore->nnz); + printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); +#endif + } else { + printf("dgstrf() error returns INFO= %d\n", *info); + if ( *info <= *n ) { /* factorization completes */ + zQuerySpace(L, U, &mem_usage); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); + } + } + + /* Restore to 1-based indexing */ + for (i = 0; i < *nnz; ++i) ++colind[i]; + for (i = 0; i <= *n; ++i) ++rowptr[i]; + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->L = L; + LUfactors->U = U; + LUfactors->perm_c = perm_c; + LUfactors->perm_r = perm_r; + *f_factors = (fptr) LUfactors; + + /* Free un-wanted storage */ + SUPERLU_FREE(etree); + Destroy_SuperMatrix_Store(&A); + Destroy_CompCol_Permuted(&AC); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_zslu_solve_(int *itrans, int *n, int *nrhs, +#ifdef Have_SLU_ + doublecomplex *b, int *ldb, + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *b, int *ldb, + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs triangular solve + * + */ +#ifdef Have_SLU_ + SuperMatrix B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + if (*itrans == 0) { + trans = NOTRANS; + } else if (*itrans ==1) { + trans = TRANS; + } else if (*itrans ==2) { + trans = CONJ; + } else { + trans = NOTRANS; + } + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Extract the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + L = LUfactors->L; + U = LUfactors->U; + perm_c = LUfactors->perm_c; + perm_r = LUfactors->perm_r; + + zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE); + /* Solve the system A*X=B, overwriting B with X. */ + zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); + if (info != 0) { + if (B.Stype != SLU_DN) fprintf(stderr,"zgstrs error kind 1: SLU_DN\n"); + if (B.Dtype != SLU_Z) fprintf(stderr,"zgstrs error kind 2: SLU_Z\n"); + if (B.Mtype != SLU_GE) fprintf(stderr,"zgstrs error kind 3: SLU_GE\n"); + } + + Destroy_SuperMatrix_Store(&B); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_zslu_free_( +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * + * free all storage in the end + * + */ +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + /* Free the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + SUPERLU_FREE (LUfactors->perm_r); + SUPERLU_FREE (LUfactors->perm_c); + Destroy_SuperNode_Matrix(LUfactors->L); + Destroy_CompCol_Matrix(LUfactors->U); + SUPERLU_FREE (LUfactors->L); + SUPERLU_FREE (LUfactors->U); + SUPERLU_FREE (LUfactors); + *info = 0; +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + diff --git a/psb_zsp_renum.f90 b/psb_zsp_renum.f90 new file mode 100644 index 00000000..7cdf5f83 --- /dev/null +++ b/psb_zsp_renum.f90 @@ -0,0 +1,458 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zsp_renum(a,desc_a,blck,p,atmp,info) + use psb_base_mod + use psb_prec_type + implicit none + + ! .. array Arguments .. + type(psb_zspmat_type), intent(in) :: a,blck + type(psb_zspmat_type), intent(inout) :: atmp + type(psb_zbaseprc_type), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + + character(len=20) :: name, ch_err + integer nztota, nztotb, nztmp, nzl, nnr, ir, mglob, mtype, n_row, & + & nrow_a,n_col, nhalo,lovr, ind, iind, pi,nr,ns,i,j,jj,k,kk + integer ::ictxt,np,me, err_act + integer, allocatable :: itmp(:), itmp2(:) + complex(kind(1.d0)), allocatable :: ztmp(:) + real(kind(1.d0)) :: t1,t2,t3,t4,t5,t6,mpi_wtime, t7, t8 + external mpi_wtime + + if (psb_get_errstatus().ne.0) return + info=0 + name='apply_renum' + call psb_erractionsave(err_act) + + ictxt=psb_cd_get_context(desc_a) + call psb_info(ictxt, me, np) + +!!!!!!!!!!!!!!!! CHANGE FOR NON-CSR A + ! + ! Renumbering type: + ! 1. Global column indices + ! (2. GPS band reduction disabled for the time being) + + if (p%iprcparm(iren_)==renum_glb_) then + atmp%m = a%m + blck%m + atmp%k = a%k + atmp%fida='CSR' + atmp%descra = 'GUN' + + ! This is the renumbering coherent with global indices.. + mglob = psb_cd_get_global_rows(desc_a) + ! + ! Remember: we have switched IA1=COLS and IA2=ROWS + ! Now identify the set of distinct local column indices + ! + + nnr = p%desc_data%matrix_data(psb_n_row_) + allocate(p%perm(nnr),p%invperm(nnr),itmp2(nnr),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + do k=1,nnr + itmp2(k) = p%desc_data%loc_to_glob(k) + enddo + ! + ! We want: NEW(I) = OLD(PERM(I)) + ! + call isrx(nnr,itmp2,p%perm) + + do k=1, nnr + p%invperm(p%perm(k)) = k + enddo + t3 = mpi_wtime() + + ! Build ATMP with new numbering. + nztmp=size(atmp%aspk) + allocate(itmp(max(8,atmp%m+2,nztmp+2)),ztmp(atmp%m),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + j = 1 + atmp%ia2(1) = 1 + do i=1, atmp%m + ir = p%perm(i) + + if (ir <= a%m ) then + + nzl = a%ia2(ir+1) - a%ia2(ir) + if (nzl > size(ztmp)) then + call psb_realloc(nzl,ztmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + ztmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = ztmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(ztmp)) then + call psb_realloc(nzl,ztmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + ztmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = ztmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + deallocate(itmp,itmp2,ztmp) + + else if (p%iprcparm(iren_)==renum_gps_) then + + atmp%m = a%m + blck%m + atmp%k = a%k + atmp%fida='CSR' + atmp%descra = 'GUN' + do i=1, a%m + atmp%ia2(i) = a%ia2(i) + do j= a%ia2(i), a%ia2(i+1)-1 + atmp%ia1(j) = a%ia1(j) + enddo + enddo + atmp%ia2(a%m+1) = a%ia2(a%m+1) + nztota = atmp%ia2(a%m+1) -1 + if (blck%m>0) then + do i=1, blck%m + atmp%ia2(a%m+i) = nztota+blck%ia2(i) + do j= blck%ia2(i), blck%ia2(i+1)-1 + atmp%ia1(nztota+j) = blck%ia1(j) + enddo + enddo + atmp%ia2(atmp%m+1) = nztota+blck%ia2(blck%m+1) + endif + nztmp = atmp%ia2(atmp%m+1) - 1 + + + ! This is a renumbering with Gibbs-Poole-Stockmeyer + ! band reduction. Switched off for now. To be fixed, + ! gps_reduction should get p%perm. + + ! write(0,*) me,' Renumbering: realloc perms',atmp%m + call psb_realloc(atmp%m,p%perm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_realloc(atmp%m,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(itmp(max(8,atmp%m+2,nztmp+2)),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + itmp(1:8) = 0 + ! write(0,*) me,' Renumbering: Calling Metis' + + ! write(0,*) size(p%av(u_pr_)%pl),size(p%av(l_pr_)%pr) + call gps_reduction(atmp%m,atmp%ia2,atmp%ia1,p%perm,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='gps_reduction' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! write(0,*) me,' Renumbering: Done GPS' + ! call psb_barrier(ictxt) + do i=1, atmp%m + if (p%perm(i) /= i) then + write(0,*) me,' permutation is not identity ' + exit + endif + enddo + + + + do k=1, nnr + p%invperm(p%perm(k)) = k + enddo + t3 = mpi_wtime() + + ! Build ATMP with new numbering. + + allocate(itmp2(max(8,atmp%m+2,nztmp+2)),ztmp(atmp%m),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + j = 1 + atmp%ia2(1) = 1 + do i=1, atmp%m + ir = p%perm(i) + + if (ir <= a%m ) then + + nzl = a%ia2(ir+1) - a%ia2(ir) + if (nzl > size(ztmp)) then + call psb_realloc(nzl,ztmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + ztmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = ztmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(ztmp)) then + call psb_realloc(nzl,ztmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + ztmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = ztmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + + deallocate(itmp,itmp2,ztmp) + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + + subroutine gps_reduction(m,ia,ja,perm,iperm,info) + integer i,j,dgConn,Npnt,m + integer n,idpth,ideg,ibw2,ipf2 + integer,dimension(:) :: perm,iperm,ia,ja + integer, intent(out) :: info + + integer,dimension(:,:),allocatable::NDstk + integer,dimension(:),allocatable::iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor + + character(len=20) :: name, ch_err + + if(psb_get_errstatus().ne.0) return + info=0 + name='gps_reduction' + call psb_erractionsave(err_act) + + + !--- Calcolo il massimo grado di connettivita'. + npnt = m + write(6,*) ' GPS su ',npnt + dgConn=0 + do i=1,m + dgconn = max(dgconn,(ia(i+1)-ia(i))) + enddo + !--- Il max valore di connettivita' e "dgConn" + + !--- Valori della common + n=Npnt !--- Numero di righe + iDeg=dgConn !--- Massima connettivita' + ! iDpth= !--- Numero di livelli non serve settarlo + + allocate(NDstk(Npnt,dgConn),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction first alloc OK' + endif + allocate(iOld(Npnt),renum(Npnt+1),ndeg(Npnt),lvl(Npnt),lvls1(Npnt),& + &lvls2(Npnt),ccstor(Npnt),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction 2nd alloc OK' + endif + + !--- Prepariamo il grafo della matrice + Ndstk(:,:)=0 + do i=1,Npnt + k=0 + do j = ia(i),ia(i+1) - 1 + if ((1<=ja(j)).and.( ja( j ) /= i ).and.(ja(j)<=npnt)) then + k = k+1 + Ndstk(i,k)=ja(j) + endif + enddo + ndeg(i)=k + enddo + + !--- Numerazione. + do i=1,Npnt + iOld(i)=i + enddo + write(0,*) 'gps_red : Preparation done' + !--- + !--- Chiamiamo funzione reduce. + call psb_gps_reduce(Ndstk,Npnt,iOld,renum,ndeg,lvl,lvls1, lvls2,ccstor,& + & ibw2,ipf2,n,idpth,ideg) + write(0,*) 'gps_red : Done reduce' + !--- Permutazione + perm(1:Npnt)=renum(1:Npnt) + !--- Inversa permutazione + do i=1,Npnt + iperm(perm(i))=i + enddo + !--- Puliamo tutto. + deallocate(NDstk,iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine gps_reduction + +end subroutine psb_zsp_renum diff --git a/psb_zumf_bld.f90 b/psb_zumf_bld.f90 new file mode 100644 index 00000000..c82ce39f --- /dev/null +++ b/psb_zumf_bld.f90 @@ -0,0 +1,211 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine psb_zumf_bld(a,desc_a,p,info) + use psb_base_mod + use psb_prec_type + implicit none + + type(psb_zspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_zbaseprc_type), intent(inout) :: p + integer, intent(out) :: info + + + type(psb_zspmat_type) :: blck, atmp + character(len=5) :: fmt + character :: upd='F' + integer :: i,j,nza,nzb,nzt,ictxt, me,np,err_act + integer :: i_err(5) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + interface psb_asmatbld + Subroutine psb_zasmatbld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_base_mod + use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_zspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_zasmatbld + end interface + + info=0 + name='psb_umf_bld' + call psb_erractionsave(err_act) + + ictxt = desc_A%matrix_data(psb_ctxt_) + call psb_info(ictxt, me, np) + + fmt = 'COO' + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + + atmp%fida='COO' + if (Debug) then + write(0,*) me, 'UMFBLD: Calling csdp' + call psb_barrier(ictxt) + endif + + call psb_zcsdp(a,atmp,info) + if(info /= 0) then + info=4010 + ch_err='psb_zcsdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nza = psb_sp_get_nnzeros(atmp) + nzb = psb_sp_get_nnzeros(a) + if (Debug) then + write(0,*) me, 'UMFBLD: Done csdp',info,nza,atmp%m,atmp%k,nzb + call psb_barrier(ictxt) + endif + call psb_asmatbld(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info,outfmt=fmt) + if(info /= 0) then + info=4010 + ch_err='psb_asmatbld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzb = psb_sp_get_nnzeros(blck) + if (Debug) then + write(0,*) me, 'UMFBLD: Done asmatbld',info,nzb,blck%fida + call psb_barrier(ictxt) + endif + if (nzb > 0 ) then + if (size(atmp%aspk) +#ifdef Have_UMF_ +#include "umfpack.h" +#endif + +#ifdef LargeFptr +typedef long long fptr; /* 64-bit*/ +#else +typedef int fptr; /* 32-bit by default */ +#endif + +void +psb_zumf_factor_(int *n, int *nnz, + double *values, int *rowind, int *colptr, +#ifdef Have_UMF_ + fptr *symptr, + fptr *numptr, + +#else + void *symptr, + void *numptr, +#endif + int *info) + +{ + +#ifdef Have_UMF_ + double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL]; + void *Symbolic, *Numeric ; + int i; + + + umfpack_zi_defaults(Control); + + for (i = 0; i <= *n; ++i) --colptr[i]; + for (i = 0; i < *nnz; ++i) --rowind[i]; + *info = umfpack_zi_symbolic (*n, *n, colptr, rowind, values, NULL, &Symbolic, + Control, Info); + + + if ( *info == UMFPACK_OK ) { + *info = 0; + } else { + printf("umfpack_zi_symbolic() error returns INFO= %d\n", *info); + *info = -11; + *numptr = (fptr) NULL; + return; + } + + *symptr = (fptr) Symbolic; + + *info = umfpack_zi_numeric (colptr, rowind, values, NULL, Symbolic, &Numeric, + Control, Info) ; + + + if ( *info == UMFPACK_OK ) { + *info = 0; + *numptr = (fptr) Numeric; + } else { + printf("umfpack_zi_numeric() error returns INFO= %d\n", *info); + *info = -12; + *numptr = (fptr) NULL; + } + + for (i = 0; i <= *n; ++i) ++colptr[i]; + for (i = 0; i < *nnz; ++i) ++rowind[i]; +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +psb_zumf_solve_(int *itrans, int *n, + double *x, double *b, int *ldb, +#ifdef Have_UMF_ + fptr *numptr, + +#else + void *numptr, +#endif + int *info) + +{ +#ifdef Have_UMF_ + double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL]; + void *Symbolic, *Numeric ; + int i,trans; + + + umfpack_di_defaults(Control); + Control[UMFPACK_IRSTEP]=0; + + + if (*itrans == 0) { + trans = UMFPACK_A; + } else if (*itrans ==1) { + trans = UMFPACK_At; + } else { + trans = UMFPACK_A; + } + + *info = umfpack_zi_solve(trans,NULL,NULL,NULL,NULL, + x,NULL,b,NULL,(void *) *numptr,Control,Info); + +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +psb_zumf_free_( +#ifdef Have_UMF_ + fptr *symptr, + fptr *numptr, + +#else + void *symptr, + void *numptr, +#endif + int *info) + +{ +#ifdef Have_UMF_ + void *Symbolic, *Numeric ; + Symbolic = (void *) *symptr; + Numeric = (void *) *numptr; + + umfpack_zi_free_numeric(&Numeric); + umfpack_zi_free_symbolic(&Symbolic); + *info=0; +#else + fprintf(stderr," UMF Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + +